data-processing-example/data-processing.ipynb

506 lines
147 KiB
Text
Raw Normal View History

2024-05-20 10:19:55 +00:00
{
"cells": [
2024-05-26 12:06:52 +00:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Musterbeispiel Data Processing\n",
"\n",
"Notwendige Packages befinden sich in `requirements.txt` - installieren mit `pip install -r requirements.txt`.\n",
2024-05-27 07:08:01 +00:00
"\n",
2024-05-26 12:06:52 +00:00
"Die Verwendung eines Virtual Environments (venv) wird empfohlen (`py -m venv .venv`, dann `.venv/scripts/activate` bzw. `.venv/bin/activate`)"
]
},
2024-05-20 10:19:55 +00:00
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 1,
2024-05-20 10:19:55 +00:00
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"import numpy as np"
]
},
2024-05-27 07:26:15 +00:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Beschreibung des Datensatzes\n",
"\n",
"- Datensatz beinhaltet Daten der Wetterstation Freiburg des Deutschen Wetterdienstes\n",
"- Datensatz ist csv mit Trennzeichen `;`\n",
"- Datensatz umfasst 6 Spalten:\n",
"\n",
"\n",
"| | STATIONS_ID | MESS_DATUM | QN_4 | TT_TER | RF_TER | eor |\n",
"|--------------------|-----------------------|------------------------------|----------------------|-----------------------|----------------------------|------------|\n",
"| Beschreibung | ID der Wetterstation | Datum und Stunde der Messung | Qualität der Messung | Temperatur | Relative Luftfeutchtigkeit | End of Row |\n",
"| Datentyp | Integer | String (Format yyyymmddhh) | Integer | Float | Float | String |\n",
"| Fehlende Einträge | Nein | Nein | Nein | Ja (-999) | Ja (-999) | Nein |\n",
"| Relevant | Nein | Ja | Teilweise | Ja | Ja | Nein |\n",
"\n",
"\n",
"## Schritte der Datenverarbeitung\n",
"\n",
"1. Einlesen des Datensatzes\n",
"2. Konvertierung der Datumsspalte in datetime objekte\n",
"3. Entfernen nicht benötigter Spalten\n",
"4. Plotten der Daten für bessere Übersicht\n",
"5. Identifizieren und kompensieren von Fehlerwerten durch Interpolation\n",
"6. Plotten des Ergebnisses"
]
},
2024-05-20 10:19:55 +00:00
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 2,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-27 13:38:21 +00:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>STATIONS_ID</th>\n",
" <th>MESS_DATUM</th>\n",
" <th>QN_4</th>\n",
" <th>TT_TER</th>\n",
" <th>RF_TER</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1939.0</td>\n",
" <td>1.939000e+03</td>\n",
" <td>1939.000000</td>\n",
" <td>1939.000000</td>\n",
" <td>1939.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1443.0</td>\n",
" <td>2.023268e+09</td>\n",
" <td>5.121712</td>\n",
" <td>-1.439144</td>\n",
" <td>58.563177</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.0</td>\n",
" <td>5.373317e+05</td>\n",
" <td>3.262451</td>\n",
" <td>114.311663</td>\n",
" <td>122.156135</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1443.0</td>\n",
" <td>2.022111e+09</td>\n",
" <td>1.000000</td>\n",
" <td>-999.000000</td>\n",
" <td>-999.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1443.0</td>\n",
" <td>2.023042e+09</td>\n",
" <td>3.000000</td>\n",
" <td>6.200000</td>\n",
" <td>59.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1443.0</td>\n",
" <td>2.023092e+09</td>\n",
" <td>3.000000</td>\n",
" <td>10.900000</td>\n",
" <td>74.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1443.0</td>\n",
" <td>2.024012e+09</td>\n",
" <td>9.000000</td>\n",
" <td>16.600000</td>\n",
" <td>87.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>1443.0</td>\n",
" <td>2.024052e+09</td>\n",
" <td>9.000000</td>\n",
" <td>34.300000</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" STATIONS_ID MESS_DATUM QN_4 TT_TER RF_TER\n",
"count 1939.0 1.939000e+03 1939.000000 1939.000000 1939.000000\n",
"mean 1443.0 2.023268e+09 5.121712 -1.439144 58.563177\n",
"std 0.0 5.373317e+05 3.262451 114.311663 122.156135\n",
"min 1443.0 2.022111e+09 1.000000 -999.000000 -999.000000\n",
"25% 1443.0 2.023042e+09 3.000000 6.200000 59.000000\n",
"50% 1443.0 2.023092e+09 3.000000 10.900000 74.000000\n",
"75% 1443.0 2.024012e+09 9.000000 16.600000 87.000000\n",
"max 1443.0 2.024052e+09 9.000000 34.300000 100.000000"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Einlesen der Wetterdaten. Diese müssen sich im selben ordner wie dieses Notebook befinden\n",
2024-05-20 10:19:55 +00:00
"datafile = pathlib.Path() / \"wetterdaten_freiburg_2022_2024.csv\"\n",
"data = pd.read_csv(datafile, sep=\";\")\n",
2024-05-26 12:06:52 +00:00
"\n",
"# Struktur des pandas dataframes ausgeben\n",
2024-05-20 10:19:55 +00:00
"data.describe()"
]
},
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 3,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-27 13:38:21 +00:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MESS_DATUM</th>\n",
" <th>TT_TER</th>\n",
" <th>RF_TER</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1939</td>\n",
" <td>1939.000000</td>\n",
" <td>1939.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2023-09-03 09:01:23.548220672</td>\n",
" <td>-1.439144</td>\n",
" <td>58.563177</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>2022-11-13 06:00:00</td>\n",
" <td>-999.000000</td>\n",
" <td>-999.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>2023-04-23 15:00:00</td>\n",
" <td>6.200000</td>\n",
" <td>59.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2023-09-16 12:00:00</td>\n",
" <td>10.900000</td>\n",
" <td>74.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2024-01-15 15:00:00</td>\n",
" <td>16.600000</td>\n",
" <td>87.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2024-05-15 18:00:00</td>\n",
" <td>34.300000</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>114.311663</td>\n",
" <td>122.156135</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" MESS_DATUM TT_TER RF_TER\n",
"count 1939 1939.000000 1939.000000\n",
"mean 2023-09-03 09:01:23.548220672 -1.439144 58.563177\n",
"min 2022-11-13 06:00:00 -999.000000 -999.000000\n",
"25% 2023-04-23 15:00:00 6.200000 59.000000\n",
"50% 2023-09-16 12:00:00 10.900000 74.000000\n",
"75% 2024-01-15 15:00:00 16.600000 87.000000\n",
"max 2024-05-15 18:00:00 34.300000 100.000000\n",
"std NaN 114.311663 122.156135"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Datumswerte konvertieren\n",
2024-05-20 10:19:55 +00:00
"data[\"MESS_DATUM\"] = pd.to_datetime(data[\"MESS_DATUM\"], format=\"%Y%m%d%H\")\n",
2024-05-26 12:06:52 +00:00
"\n",
"# Nicht benötigte Spalten entfernen\n",
"data.drop([\"STATIONS_ID\", \"QN_4\", \"eor\"], axis=1, inplace=True)\n",
"\n",
"# Struktur des pandas dataframes ausgeben\n",
2024-05-20 10:19:55 +00:00
"data.describe()"
]
},
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 4,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-27 13:38:21 +00:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAGGCAYAAABmGOKbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAACLZElEQVR4nO3dd3gUVdsG8HvTNoUkBNIoSSD0hEAgtNDBSMCIYomFVwQERAQLIAiiFFFRaWJBxFcE/fQVwYaCQGgCEkWQIiX0JiGhJ0Ag9Xx/hB12s7vJBnYzczb377pywc7Mbp7ZzJkzz5kz5+iEEAJEREREREREZHcuagdARERERERE5KyYdBMRERERERE5CJNuIiIiIiIiIgdh0k1ERERERETkIEy6iYiIiIiIiByESTcRERERERGRgzDpJiIiIiIiInIQJt1EREREREREDsKkm4iIiIiIiMhBmHQTERERkZQ2bNgAnU6HDRs22PVzdTodJk+ebNfPVMPKlSsRGxsLT09P6HQ6XL58udTt//rrL7Rv3x4+Pj7Q6XTYuXNnhcRpSdeuXdG0adM7+owBAwagTp06t/3eKlWq3NHvNzh+/Dh0Oh0WLlxol88j+TDpJtIYnU5n04+9LzBkMnfuXFZcRESSWbhwoUk95ubmhlq1amHAgAE4ffp0hcezYsUKzSXWOp0OI0aMsMtnXbhwAY888gi8vLzw0Ucf4csvv4SPjw/eeust/Pjjj2bb5+fnIzk5GRcvXsTs2bPx5ZdfIiIiwi6xOFJ6ejomT56sagPB7dDi8UeO46Z2AERk6ssvvzR5/cUXXyAlJcVseZMmTSoyLE2ZO3cuAgMDMWDAALVDISKicnr99ddRt25d3LhxA3/88QcWLlyIzZs3Y8+ePfD09KywOFasWIGPPvrIYuJz/fp1uLnJfZn8119/4cqVK5g6dSoSEhKU5W+99RYefvhh9OnTx2T7I0eO4MSJE/j0008xePDgCo729qWnp2PKlCmoU6cOYmNjTdZ9+umnKCoqUicwIxEREbh+/Trc3d2VZaUdf+R85D6bEDmhJ554wuT1H3/8gZSUFLPlzkIIgRs3bsDLy4txEBFVAr169UKrVq0AAIMHD0ZgYCDeeecdLFu2DI888ojK0RWryOTfUc6ePQsAqFq1qkO2l4FxkqsmnU7nFMcU3T52LyeSUFFREd577z1ER0fD09MTISEhGDp0KC5dumSyXZ06dXDvvfdiw4YNaNWqFby8vBATE6N0Tf/+++8RExMDT09PxMXFYceOHSbvNzzPdPToUSQmJsLHxwc1a9bE66+/DiHEHcW0atUqJaZPPvkEAPD555+je/fuCA4Ohl6vR1RUFD7++GOz9+/duxe//fab0kWxa9euAIDJkydDp9OZfV+GLo3Hjx+3KQ4iIqo4nTp1AlB8p9VYWloaHn74YVSrVg2enp5o1aoVli1bVubnbdq0CcnJyQgPD4der0dYWBhGjhyJ69evK9sMGDAAH330EQDTx7oMjJ/pXrp0KXQ6HX777Tez3/XJJ59Ap9Nhz549dxy3Law9w17ymeGuXbuif//+AIDWrVtDp9NhwIAB0Ol0uHbtGhYtWqTs84ABAzBgwAB06dIFAJCcnGxSt9q6T+WpgwHg119/RZcuXeDr6ws/Pz+0bt0aX3/9tdn79+3bh27dusHb2xu1atXCu+++a/J9tG7dGgAwcOBAZZ8M34OlZ7ovXLiAfv36wc/PD1WrVkX//v2xa9cum5653rlzJ4KCgtC1a1dcvXoVAHD69Gk89dRTCAkJgV6vR3R0NBYsWGDyvpJ/n7KOP3I+vNNNJKGhQ4di4cKFGDhwIJ5//nkcO3YMH374IXbs2IHff//dpGX38OHD6Nu3L4YOHYonnngCM2bMQO/evTFv3jy88sorePbZZwEA06ZNwyOPPIIDBw7AxeVWe1xhYSF69uyJdu3a4d1338XKlSsxadIkFBQU4PXXX7+tmA4cOIDHH38cQ4cOxZAhQ9CoUSMAwMcff4zo6Gjcd999cHNzw88//4xnn30WRUVFGD58OADgvffew3PPPYcqVapgwoQJAICQkJDb+h6txUFERBXHkIwFBAQoy/bu3YsOHTqgVq1aGDduHHx8fPDtt9+iT58++O677/DAAw9Y/bwlS5YgJycHw4YNQ/Xq1bF161Z88MEH+Pfff7FkyRIAxXVWenq6xce3SkpKSkKVKlXw7bffKompweLFixEdHa0M+HUncdvThAkT0KhRI8yfP1/pzl+vXj0kJCRg8ODBaNOmDZ5++mkAQL169QAAtWrVwltvvYXnn38erVu3VupWR+zTwoUL8dRTTyE6Ohrjx49H1apVsWPHDqxcuRJ9+/ZVtrt06RJ69uyJBx98EI888giWLl2Kl19+GTExMejVqxeaNGmC119/HRMnTsTTTz+tNOC0b9/e4u8tKipC7969sXXrVgwbNgyNGzfGTz/9pDRQlOavv/5CYmIiWrVqhZ9++gleXl7IzMxEu3btlGfxg4KC8Ouvv2LQoEHIzs7Giy++aPGzynP8kZMQRKRpw4cPF8ZFddOmTQKA+Oqrr0y2W7lypdnyiIgIAUBs2bJFWbZq1SoBQHh5eYkTJ04oyz/55BMBQKxfv15Z1r9/fwFAPPfcc8qyoqIikZSUJDw8PMS5c+duO6aVK1ea7WtOTo7ZssTERBEZGWmyLDo6WnTp0sVs20mTJglLp7XPP/9cABDHjh2zKQ4iIrI/w7l4zZo14ty5c+LUqVNi6dKlIigoSOj1enHq1Cll27vuukvExMSIGzduKMuKiopE+/btRYMGDZRl69evN6u7LNUl06ZNEzqdzqTeK1m/GgMgJk2apLx+/PHHRXBwsCgoKFCWnTlzRri4uIjXX3+93HFbA0AMHz7c6npL+yuEEMeOHRMAxOeff64sM3zff/31l8m2Pj4+on///lY/e8mSJSbLbd0nW+vgy5cvC19fX9G2bVtx/fp1k22LioqU/3fp0kUAEF988YWyLDc3V4SGhoqHHnpIWfbXX3+Z7btB//79RUREhPL6u+++EwDEe++9pywrLCwU3bt3N/uM/v37Cx8fHyGEEJs3bxZ+fn4iKSnJ5HsYNGiQqFGjhjh//rzJ733ssceEv7+/cixa+vuUdvyR82H3ciLJLFmyBP7+/rj77rtx/vx55ScuLg5VqlTB+vXrTbaPiopCfHy88rpt27YAgO7duyM8PNxs+dGjR81+p/FIqobW3Ly8PKxZs+a2Yqpbty4SExPNfo/x89RZWVk4f/48unTpgqNHjyIrK8vm78hW1uIgIiLHSUhIQFBQEMLCwvDwww/Dx8cHy5YtQ+3atQEAFy9exLp16/DII4/gypUrSp1y4cIFJCYm4tChQ6WOdm5cl1y7dg3nz59H+/btIYQwe4zKVo8++ijOnj1r0q176dKlKCoqwqOPPmqXuLXIEfuUkpKCK1euYNy4cWbPOZfsYl2lShWTMW08PDzQpk0bi9cqtli5ciXc3d0xZMgQZZmLi4vSm86S9evXIzExEXfddRe+//576PV6AMVjwXz33Xfo3bs3hBAm1z+JiYnIysrC33//fVtxkvNh93IiyRw6dAhZWVkIDg62uN4wEIqBcWINAP7+/gCAsLAwi8tLPoPt4uKCyMhIk2UNGzYEcKtLYHljqlu3rsXtfv/9d0yaNAmpqanIyckxWZeVlaXEaC/W4iAiIsf56KOP0LBhQ2RlZWHBggXYuHGjksgAxY9FCSHw2muv4bXXXrP4GWfPnkWtWrUsrjt58iQmTpyIZcuWmdVpt9uA27NnT/j7+2Px4sW46667ABR3LY+NjVXqxDuNW4scsU+GZ/dtmYO7du3aZol4QEAAdu/ebfPvM3bixAnUqFED3t7eJsvr169vcfsbN24gKSkJcXFx+Pbbb01GtD937hwuX76M+fPnY/78+RbfX/L6hyovJt1EkikqKkJwcDC++uori+uDgoJMXru6ulrcztpyUWKANEfEZGmE8CNHjuCuu+5C48aNMWvWLISFhcHDwwMrVqzA7NmzbZryw9ogJIWFhRaXc6RyIqKK16Z
"text/plain": [
"<Figure size 1000x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
2024-05-20 10:19:55 +00:00
"fig = plt.figure(figsize=(10, 4))\n",
"ax = plt.subplot(1, 2, 1)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
"ax.set_title(\"Temperatur\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"ax = plt.subplot(1, 2, 2)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 5,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-27 13:38:21 +00:00
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>MESS_DATUM</th>\n",
" <th>TT_TER</th>\n",
" <th>RF_TER</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>1939</td>\n",
" <td>1939.000000</td>\n",
" <td>1939.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>2023-09-03 09:01:23.548220672</td>\n",
" <td>11.634683</td>\n",
" <td>72.296029</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>2022-11-13 06:00:00</td>\n",
" <td>-9.100000</td>\n",
" <td>20.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>2023-04-23 15:00:00</td>\n",
" <td>6.500000</td>\n",
" <td>60.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>2023-09-16 12:00:00</td>\n",
" <td>11.100000</td>\n",
" <td>75.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>2024-01-15 15:00:00</td>\n",
" <td>16.700000</td>\n",
" <td>87.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2024-05-15 18:00:00</td>\n",
" <td>34.300000</td>\n",
" <td>100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>NaN</td>\n",
" <td>7.940810</td>\n",
" <td>17.596512</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" MESS_DATUM TT_TER RF_TER\n",
"count 1939 1939.000000 1939.000000\n",
"mean 2023-09-03 09:01:23.548220672 11.634683 72.296029\n",
"min 2022-11-13 06:00:00 -9.100000 20.000000\n",
"25% 2023-04-23 15:00:00 6.500000 60.000000\n",
"50% 2023-09-16 12:00:00 11.100000 75.000000\n",
"75% 2024-01-15 15:00:00 16.700000 87.000000\n",
"max 2024-05-15 18:00:00 34.300000 100.000000\n",
"std NaN 7.940810 17.596512"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
2024-05-20 10:19:55 +00:00
"source": [
"# Fehlerwerte vereinheitlichen\n",
"data.replace(-999, np.nan, inplace=True)\n",
"\n",
"# Fehlende Werte durch Interpolation ersetzen\n",
"data[\"TT_TER\"] = data[\"TT_TER\"].interpolate()\n",
"data[\"RF_TER\"] = data[\"RF_TER\"].interpolate()\n",
"\n",
2024-05-26 12:06:52 +00:00
"# Struktur des pandas dataframes ausgeben\n",
"data.describe()"
]
},
{
"cell_type": "code",
2024-05-27 13:38:21 +00:00
"execution_count": 6,
2024-05-26 12:06:52 +00:00
"metadata": {},
2024-05-27 13:38:21 +00:00
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA90AAAGGCAYAAABmGOKbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy80BEi2AAAACXBIWXMAAA9hAAAPYQGoP6dpAAD6vUlEQVR4nOydd5zU1NrHf9m+bKXuUhZYepEmvShtaYKCoChW7FdRL3LVK/dV9CKKclWwIFjBXkAERAUB6b333mHZpW7vM3n/mJ3ZJHOSnLSZzOz5+uHjTnJycpKc9pznOc/D8TzPg8FgMBgMBoPBYDAYDIbphPi7AAwGg8FgMBgMBoPBYAQrTOhmMBgMBoPBYDAYDAbDIpjQzWAwGAwGg8FgMBgMhkUwoZvBYDAYDAaDwWAwGAyLYEI3g8FgMBgMBoPBYDAYFsGEbgaDwWAwGAwGg8FgMCyCCd0MBoPBYDAYDAaDwWBYBBO6GQwGg8FgMBgMBoPBsAgmdDMYDAaDwWAwGAwGg2ERTOhmMBgMBoPBYNiO1atXg+M4rF692tR8OY7Da6+9Zmqe/mDp0qVo3749oqKiwHEcsrKyFNNv27YNPXr0QExMDDiOw+7du31SThJ9+vTBDTfcYCiPsWPHomHDhrqvjY2NNXR/N6dPnwbHcZg7d64p+TGCEyZ0Mxg+huM4qn9mTzICiY8//pgNXgwGgxFAzJ07VzSGhYWFoW7duhg7diwuXLjg8/L88ccfthOsOY7D008/bUpeV69exejRoxEdHY2ZM2fim2++QUxMDN58800sXLjQK31paSnuvPNOXLt2DdOnT8c333yDBg0amFIWK0lPT8drr73m1wUCPdix/jH8S5i/C8BgVDa++eYb0e+vv/4ay5cv9zresmVLXxbLVnz88ceoUaMGxo4d6++iMBgMBkMDkydPRmpqKoqKirB582bMnTsX69evx/79+xEVFeWzcvzxxx+YOXMmUfApLCxEWFhgT4G3bduG3NxcvP7660hLS/Mcf/PNN3HHHXdgxIgRovQnTpzAmTNn8Nlnn+HRRx/1cWn1k56ejv/+979o2LAh2rdvLzr32Wefwel0+qdgAho0aIDCwkKEh4d7jinVP0blJLB7HAYjALnvvvtEvzdv3ozly5d7HQ8WeJ5HUVERoqOjWTkYDAYjyBkyZAg6deoEAHj00UdRo0YNvP3221i8eDFGjx7t59K58KXwbxWXLl0CACQmJlqSPhAQCrn+hOO4oKhTDGth5uUMhg1xOp2YMWMGWrdujaioKCQlJeGJJ57A9evXRekaNmyIYcOGYfXq1ejUqROio6PRpk0bj2n6ggUL0KZNG0RFRaFjx47YtWuX6Hr3nqaTJ09i0KBBiImJQZ06dTB58mTwPG+oTMuWLfOU6ZNPPgEAzJkzB/369UOtWrUQGRmJVq1aYdasWV7XHzhwAGvWrPGYKfbp0wcA8Nprr4HjOK/35TZrPH36NFU5GAwGg+EbbrrpJgAuTauQw4cP44477kC1atUQFRWFTp06YfHixar5rVu3DnfeeSfq16+PyMhIpKSk4LnnnkNhYaEnzdixYzFz5kwA4i1dboR7uufPnw+O47BmzRqve33yySfgOA779+83XG4a5PawS/cM9+nTBw8++CAAoHPnzuA4DmPHjgXHccjPz8dXX33leeaxY8di7Nix6N27NwDgzjvvFI2rtM+kZfwFgD///BO9e/dGXFwc4uPj0blzZ3z//fde1x88eBB9+/ZFlSpVULduXUybNk30Pjp37gwAeOihhzzP5H4PpD3dV69exf3334/4+HgkJibiwQcfxJ49e6j2XO/evRs1a9ZEnz59kJeXBwC4cOECHn74YSQlJSEyMhKtW7fGl19+KbpO+n3U6h+jcsI03QyGDXniiScwd+5cPPTQQ3j22Wdx6tQpfPTRR9i1axc2bNggWt09fvw47rnnHjzxxBO477778M477+DWW2/F7Nmz8Z///AdPPfUUAGDq1KkYPXo0jhw5gpCQivU2h8OBwYMHo1u3bpg2bRqWLl2KV199FWVlZZg8ebKuMh05cgRjxozBE088gcceewzNmzcHAMyaNQutW7fGbbfdhrCwMPz222946qmn4HQ6MW7cOADAjBkz8MwzzyA2Nhb/93//BwBISkrS9R7lysFgMBgM3+AWxqpWreo5duDAAfTs2RN169bFSy+9hJiYGPz8888YMWIEfvnlF9x+++2y+c2bNw8FBQV48sknUb16dWzduhUffvghzp8/j3nz5gFwjVfp6enErVtShg4ditjYWPz8888ewdTNTz/9hNatW3scfhkpt5n83//9H5o3b45PP/3UY87fuHFjpKWl4dFHH0WXLl3w+OOPAwAaN24MAKhbty7efPNNPPvss+jcubNnXLXimebOnYuHH34YrVu3xsSJE5GYmIhdu3Zh6dKluOeeezzprl+/jsGDB2PkyJEYPXo05s+fj3//+99o06YNhgwZgpYtW2Ly5MmYNGkSHn/8cc8CTo8ePYj3dTqduPXWW7F161Y8+eSTaNGiBRYtWuRZoFBi27ZtGDRoEDp16oRFixYhOjoamZmZ6Natm2cvfs2aNfHnn3/ikUceQU5ODsaPH0/MS0v9Y1QieAaD4VfGjRvHC5viunXreAD8d999J0q3dOlSr+MNGjTgAfAbN270HFu2bBkPgI+OjubPnDnjOf7JJ5/wAPhVq1Z5jj344IM8AP6ZZ57xHHM6nfzQoUP5iIgI/vLly7rLtHTpUq9nLSgo8Do2aNAgvlGjRqJjrVu35nv37u2V9tVXX+VJ3dacOXN4APypU6eoysFgMBgMc3H3wytWrOAvX77Mnzt3jp8/fz5fs2ZNPjIykj937pwnbf/+/fk2bdrwRUVFnmNOp5Pv0aMH37RpU8+xVatWeY1bpHFk6tSpPMdxojFPOrYKAcC/+uqrnt9jxozha9WqxZeVlXmOXbx4kQ8JCeEnT56sudxyAODHjRsne570vDzP86dOneIB8HPmzPEcc7/vbdu2idLGxMTwDz74oGze8+bNEx2nfSba8TcrK4uPi4vju3btyhcWForSOp1Oz9+9e/fmAfBff/2151hxcTGfnJzMjxo1ynNs27ZtXs/u5sEHH+QbNGjg+f3LL7/wAPgZM2Z4jjkcDr5fv35eeTz44IN8TEwMz/M8v379ej4+Pp4fOnSo6D088sgjfO3atfkrV66I7nv33XfzCQkJnrpI+j5K9Y9ROWHm5QyGzZg3bx4SEhIwYMAAXLlyxfOvY8eOiI2NxapVq0TpW7Vqhe7du3t+d+3aFQDQr18/1K9f3+v4yZMnve4p9KbqXtEtKSnBihUrdJUpNTUVgwYN8rqPcD91dnY2rly5gt69e+PkyZPIzs6mfke0yJWDwWAwGNaQlpaGmjVrIiUlBXfccQdiYmKwePFi1KtXDwBw7do1/P333xg9ejRyc3M948nVq1cxaNAgHDt2TNHbuXAcyc/Px5UrV9CjRw/wPO+1hYqWu+66C5cuXRKZdc+fPx9OpxN33XWXKeW2I1Y80/Lly5Gbm4uXXnrJa5+z1MQ6NjZW5M8mIiICXbp0Ic5TaFi6dCnCw8Px2GOPeY6FhIR4LOlIrFq1CoMGDUL//v2xYMECREZGAnD5gfnll19w6623gud50dxn0KBByM7Oxs6dO3WVk1E5YeblDIbNOHbsGLKzs1GrVi3iebczFDdCwRoAEhISAAApKSnE49I92CEhIWjUqJHoWLNmzQBUmAVqLVNqaiox3YYNG/Dqq69i06ZNKCgoEJ3Lzs72lNEs5MrBYDAYDGuYOXMmmjVrhuzsbHz55ZdYu3atR5ABXFuieJ7HK6+8gldeeYWYx6VLl1C3bl3iubNnz2LSpElYvHix13imd/F28ODBSEhIwE8//YT+/fsDcJmWt2/f3jMeGi23HbHimdx792licNerV89LEK9atSr27t1LfT8hZ86cQe3atVGlShXR8SZNmhDTFxUVYejQoejYsSN+/vlnkUf7y5cvIysrC59++ik+/fRT4vXSuQ+DoQQTuhkMm+F0OlGrVi189913xPM
"text/plain": [
"<Figure size 1000x400 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
2024-05-26 12:06:52 +00:00
"source": [
"# Überarbeitete Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
2024-05-20 10:19:55 +00:00
"fig = plt.figure(figsize=(10, 4))\n",
"ax = plt.subplot(1, 2, 1)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
"ax.set_title(\"Temperatur\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"ax = plt.subplot(1, 2, 2)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-05-26 12:06:52 +00:00
"version": "3.11.0"
2024-05-20 10:19:55 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}