{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Musterbeispiel Data Processing\n", "\n", "Notwendige Packages befinden sich in `requirements.txt` - installieren mit `pip install -r requirements.txt`.\n", "Die Verwendung eines Virtual Environments (venv) wird empfohlen (`py -m venv .venv`, dann `.venv/scripts/activate` bzw. `.venv/bin/activate`)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "import pathlib\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Einlesen der Wetterdaten. Diese müssen sich im selben ordner wie dieses Notebook befinden\n", "datafile = pathlib.Path() / \"wetterdaten_freiburg_2022_2024.csv\"\n", "data = pd.read_csv(datafile, sep=\";\")\n", "\n", "# Struktur des pandas dataframes ausgeben\n", "data.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Datumswerte konvertieren\n", "data[\"MESS_DATUM\"] = pd.to_datetime(data[\"MESS_DATUM\"], format=\"%Y%m%d%H\")\n", "\n", "# Nicht benötigte Spalten entfernen\n", "data.drop([\"STATIONS_ID\", \"QN_4\", \"eor\"], axis=1, inplace=True)\n", "\n", "# Struktur des pandas dataframes ausgeben\n", "data.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Daten plotten (Temperatur und Luftfeuchtigkeit)\n", "fig = plt.figure(figsize=(10, 4))\n", "ax = plt.subplot(1, 2, 1)\n", "ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n", "ax.set_title(\"Temperatur\")\n", "monthyearFmt = mdates.DateFormatter('%m-%Y')\n", "ax.xaxis.set_major_formatter(monthyearFmt)\n", "_ = plt.xticks(rotation=90)\n", "\n", "ax = plt.subplot(1, 2, 2)\n", "ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n", "ax.set_title(\"Relative Luftfeuchtigkeit\")\n", "monthyearFmt = mdates.DateFormatter('%m-%Y')\n", "ax.xaxis.set_major_formatter(monthyearFmt)\n", "_ = plt.xticks(rotation=90)\n", "\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Fehlerwerte vereinheitlichen\n", "data.replace(-999, np.nan, inplace=True)\n", "\n", "# Fehlende Werte durch Interpolation ersetzen\n", "data[\"TT_TER\"] = data[\"TT_TER\"].interpolate()\n", "data[\"RF_TER\"] = data[\"RF_TER\"].interpolate()\n", "\n", "# Struktur des pandas dataframes ausgeben\n", "data.describe()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Überarbeitete Daten plotten (Temperatur und Luftfeuchtigkeit)\n", "fig = plt.figure(figsize=(10, 4))\n", "ax = plt.subplot(1, 2, 1)\n", "ax.plot(data[\"MESS_DATUM\"], data[\"TT_TER\"])\n", "ax.set_title(\"Temperatur\")\n", "monthyearFmt = mdates.DateFormatter('%m-%Y')\n", "ax.xaxis.set_major_formatter(monthyearFmt)\n", "_ = plt.xticks(rotation=90)\n", "\n", "ax = plt.subplot(1, 2, 2)\n", "ax.plot(data[\"MESS_DATUM\"], data[\"RF_TER\"])\n", "ax.set_title(\"Relative Luftfeuchtigkeit\")\n", "monthyearFmt = mdates.DateFormatter('%m-%Y')\n", "ax.xaxis.set_major_formatter(monthyearFmt)\n", "_ = plt.xticks(rotation=90)\n", "\n", "plt.tight_layout()\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.0" } }, "nbformat": 4, "nbformat_minor": 2 }