data-processing-example/data-processing.ipynb

149 lines
4.1 KiB
Text
Raw Normal View History

2024-05-20 10:19:55 +00:00
{
"cells": [
2024-05-26 12:06:52 +00:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Musterbeispiel Data Processing\n",
"\n",
"Notwendige Packages befinden sich in `requirements.txt` - installieren mit `pip install -r requirements.txt`.\n",
2024-05-27 07:08:01 +00:00
"\n",
2024-05-26 12:06:52 +00:00
"Die Verwendung eines Virtual Environments (venv) wird empfohlen (`py -m venv .venv`, dann `.venv/scripts/activate` bzw. `.venv/bin/activate`)"
]
},
2024-05-20 10:19:55 +00:00
{
"cell_type": "code",
2024-05-26 12:06:52 +00:00
"execution_count": 27,
2024-05-20 10:19:55 +00:00
"metadata": {},
"outputs": [],
"source": [
"import pathlib\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"import numpy as np"
]
},
{
"cell_type": "code",
2024-05-26 12:06:52 +00:00
"execution_count": null,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-26 12:06:52 +00:00
"outputs": [],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Einlesen der Wetterdaten. Diese müssen sich im selben ordner wie dieses Notebook befinden\n",
2024-05-20 10:19:55 +00:00
"datafile = pathlib.Path() / \"wetterdaten_freiburg_2022_2024.csv\"\n",
"data = pd.read_csv(datafile, sep=\";\")\n",
2024-05-26 12:06:52 +00:00
"\n",
"# Struktur des pandas dataframes ausgeben\n",
2024-05-20 10:19:55 +00:00
"data.describe()"
]
},
{
"cell_type": "code",
2024-05-26 12:06:52 +00:00
"execution_count": null,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-26 12:06:52 +00:00
"outputs": [],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Datumswerte konvertieren\n",
2024-05-20 10:19:55 +00:00
"data[\"MESS_DATUM\"] = pd.to_datetime(data[\"MESS_DATUM\"], format=\"%Y%m%d%H\")\n",
2024-05-26 12:06:52 +00:00
"\n",
"# Nicht benötigte Spalten entfernen\n",
"data.drop([\"STATIONS_ID\", \"QN_4\", \"eor\"], axis=1, inplace=True)\n",
"\n",
"# Struktur des pandas dataframes ausgeben\n",
2024-05-20 10:19:55 +00:00
"data.describe()"
]
},
{
"cell_type": "code",
2024-05-26 12:06:52 +00:00
"execution_count": null,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-26 12:06:52 +00:00
"outputs": [],
2024-05-20 10:19:55 +00:00
"source": [
2024-05-26 12:06:52 +00:00
"# Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
2024-05-20 10:19:55 +00:00
"fig = plt.figure(figsize=(10, 4))\n",
"ax = plt.subplot(1, 2, 1)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
"ax.set_title(\"Temperatur\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"ax = plt.subplot(1, 2, 2)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-05-26 12:06:52 +00:00
"execution_count": null,
2024-05-20 10:19:55 +00:00
"metadata": {},
2024-05-26 12:06:52 +00:00
"outputs": [],
2024-05-20 10:19:55 +00:00
"source": [
"# Fehlerwerte vereinheitlichen\n",
"data.replace(-999, np.nan, inplace=True)\n",
"\n",
"# Fehlende Werte durch Interpolation ersetzen\n",
"data[\"TT_TER\"] = data[\"TT_TER\"].interpolate()\n",
"data[\"RF_TER\"] = data[\"RF_TER\"].interpolate()\n",
"\n",
2024-05-26 12:06:52 +00:00
"# Struktur des pandas dataframes ausgeben\n",
"data.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Überarbeitete Daten plotten (Temperatur und Luftfeuchtigkeit)\n",
2024-05-20 10:19:55 +00:00
"fig = plt.figure(figsize=(10, 4))\n",
"ax = plt.subplot(1, 2, 1)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n",
"ax.set_title(\"Temperatur\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"ax = plt.subplot(1, 2, 2)\n",
"ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n",
"ax.set_title(\"Relative Luftfeuchtigkeit\")\n",
"monthyearFmt = mdates.DateFormatter('%m-%Y')\n",
"ax.xaxis.set_major_formatter(monthyearFmt)\n",
"_ = plt.xticks(rotation=90)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-05-26 12:06:52 +00:00
"version": "3.11.0"
2024-05-20 10:19:55 +00:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}