2024-05-20 10:19:55 +00:00
|
|
|
{
|
|
|
|
"cells": [
|
2024-05-26 12:06:52 +00:00
|
|
|
{
|
|
|
|
"cell_type": "markdown",
|
|
|
|
"metadata": {},
|
|
|
|
"source": [
|
|
|
|
"# Musterbeispiel Data Processing\n",
|
|
|
|
"\n",
|
|
|
|
"Notwendige Packages befinden sich in `requirements.txt` - installieren mit `pip install -r requirements.txt`.\n",
|
2024-05-27 07:08:01 +00:00
|
|
|
"\n",
|
2024-05-26 12:06:52 +00:00
|
|
|
"Die Verwendung eines Virtual Environments (venv) wird empfohlen (`py -m venv .venv`, dann `.venv/scripts/activate` bzw. `.venv/bin/activate`)"
|
|
|
|
]
|
|
|
|
},
|
2024-05-20 10:19:55 +00:00
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-05-26 12:06:52 +00:00
|
|
|
"execution_count": 27,
|
2024-05-20 10:19:55 +00:00
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import pathlib\n",
|
|
|
|
"import pandas as pd\n",
|
|
|
|
"import matplotlib.pyplot as plt\n",
|
|
|
|
"import matplotlib.dates as mdates\n",
|
|
|
|
"import numpy as np"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-05-26 12:06:52 +00:00
|
|
|
"execution_count": null,
|
2024-05-20 10:19:55 +00:00
|
|
|
"metadata": {},
|
2024-05-26 12:06:52 +00:00
|
|
|
"outputs": [],
|
2024-05-20 10:19:55 +00:00
|
|
|
"source": [
|
2024-05-26 12:06:52 +00:00
|
|
|
"# Einlesen der Wetterdaten. Diese müssen sich im selben ordner wie dieses Notebook befinden\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"datafile = pathlib.Path() / \"wetterdaten_freiburg_2022_2024.csv\"\n",
|
|
|
|
"data = pd.read_csv(datafile, sep=\";\")\n",
|
2024-05-26 12:06:52 +00:00
|
|
|
"\n",
|
|
|
|
"# Struktur des pandas dataframes ausgeben\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"data.describe()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-05-26 12:06:52 +00:00
|
|
|
"execution_count": null,
|
2024-05-20 10:19:55 +00:00
|
|
|
"metadata": {},
|
2024-05-26 12:06:52 +00:00
|
|
|
"outputs": [],
|
2024-05-20 10:19:55 +00:00
|
|
|
"source": [
|
2024-05-26 12:06:52 +00:00
|
|
|
"# Datumswerte konvertieren\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"data[\"MESS_DATUM\"] = pd.to_datetime(data[\"MESS_DATUM\"], format=\"%Y%m%d%H\")\n",
|
2024-05-26 12:06:52 +00:00
|
|
|
"\n",
|
|
|
|
"# Nicht benötigte Spalten entfernen\n",
|
|
|
|
"data.drop([\"STATIONS_ID\", \"QN_4\", \"eor\"], axis=1, inplace=True)\n",
|
|
|
|
"\n",
|
|
|
|
"# Struktur des pandas dataframes ausgeben\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"data.describe()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-05-26 12:06:52 +00:00
|
|
|
"execution_count": null,
|
2024-05-20 10:19:55 +00:00
|
|
|
"metadata": {},
|
2024-05-26 12:06:52 +00:00
|
|
|
"outputs": [],
|
2024-05-20 10:19:55 +00:00
|
|
|
"source": [
|
2024-05-26 12:06:52 +00:00
|
|
|
"# Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"fig = plt.figure(figsize=(10, 4))\n",
|
|
|
|
"ax = plt.subplot(1, 2, 1)\n",
|
|
|
|
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
|
|
|
|
"ax.set_title(\"Temperatur\")\n",
|
|
|
|
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
|
|
|
|
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
|
|
|
|
"_ = plt.xticks(rotation=90)\n",
|
|
|
|
"\n",
|
|
|
|
"ax = plt.subplot(1, 2, 2)\n",
|
|
|
|
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
|
|
|
|
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
|
|
|
|
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
|
|
|
|
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
|
|
|
|
"_ = plt.xticks(rotation=90)\n",
|
|
|
|
"\n",
|
|
|
|
"plt.tight_layout()\n",
|
|
|
|
"plt.show()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-05-26 12:06:52 +00:00
|
|
|
"execution_count": null,
|
2024-05-20 10:19:55 +00:00
|
|
|
"metadata": {},
|
2024-05-26 12:06:52 +00:00
|
|
|
"outputs": [],
|
2024-05-20 10:19:55 +00:00
|
|
|
"source": [
|
|
|
|
"# Fehlerwerte vereinheitlichen\n",
|
|
|
|
"data.replace(-999, np.nan, inplace=True)\n",
|
|
|
|
"\n",
|
|
|
|
"# Fehlende Werte durch Interpolation ersetzen\n",
|
|
|
|
"data[\"TT_TER\"] = data[\"TT_TER\"].interpolate()\n",
|
|
|
|
"data[\"RF_TER\"] = data[\"RF_TER\"].interpolate()\n",
|
|
|
|
"\n",
|
2024-05-26 12:06:52 +00:00
|
|
|
"# Struktur des pandas dataframes ausgeben\n",
|
|
|
|
"data.describe()"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": null,
|
|
|
|
"metadata": {},
|
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# Überarbeitete Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
|
2024-05-20 10:19:55 +00:00
|
|
|
"fig = plt.figure(figsize=(10, 4))\n",
|
|
|
|
"ax = plt.subplot(1, 2, 1)\n",
|
|
|
|
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
|
|
|
|
"ax.set_title(\"Temperatur\")\n",
|
|
|
|
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
|
|
|
|
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
|
|
|
|
"_ = plt.xticks(rotation=90)\n",
|
|
|
|
"\n",
|
|
|
|
"ax = plt.subplot(1, 2, 2)\n",
|
|
|
|
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
|
|
|
|
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
|
|
|
|
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
|
|
|
|
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
|
|
|
|
"_ = plt.xticks(rotation=90)\n",
|
|
|
|
"\n",
|
|
|
|
"plt.tight_layout()\n",
|
|
|
|
"plt.show()"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": ".venv",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
2024-05-26 12:06:52 +00:00
|
|
|
"version": "3.11.0"
|
2024-05-20 10:19:55 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|