Skip to content

Commit a12c08b

Browse files
committed
apply some suggestions, including making plotting more efficient.
1 parent 3ff6294 commit a12c08b

1 file changed

Lines changed: 41 additions & 61 deletions

File tree

demos/WaterData_demo.ipynb

Lines changed: 41 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,19 @@
9090
"Let's get into some examples using the functions listed above. First, we need to load the `waterdata` module and a few other packages and functions to go through the examples. To run the entirety of this notebook, you will need to install `dataretrieval`, `matplotlib`, and `geopandas` packages. `matplotlib` is needed to create the plots, and `geopandas` is needed to create the interactive maps."
9191
]
9292
},
93+
{
94+
"cell_type": "code",
95+
"execution_count": null,
96+
"id": "cd626a14",
97+
"metadata": {},
98+
"outputs": [],
99+
"source": [
100+
"# Install necessary packages to run notebook\n",
101+
"!pip install dataretrieval\n",
102+
"!pip install matplotlib\n",
103+
"!pip install geopandas"
104+
]
105+
},
93106
{
94107
"cell_type": "code",
95108
"execution_count": null,
@@ -105,14 +118,7 @@
105118
"from datetime import datetime, timedelta\n",
106119
"from datetime import date\n",
107120
"from dateutil.relativedelta import relativedelta\n",
108-
"from dataretrieval import waterdata\n",
109-
"\n",
110-
"# Check if geopandas is installed\n",
111-
"import importlib.util\n",
112-
"if importlib.util.find_spec(\"geopandas\") is None:\n",
113-
" GEOPANDAS=False\n",
114-
"else:\n",
115-
" GEOPANDAS=True"
121+
"from dataretrieval import waterdata"
116122
]
117123
},
118124
{
@@ -302,8 +308,7 @@
302308
"metadata": {},
303309
"outputs": [],
304310
"source": [
305-
"if GEOPANDAS:\n",
306-
" NE_locations_discharge.set_crs(crs=\"WGS84\").explore()"
311+
"NE_locations_discharge.set_crs(crs=\"WGS84\").explore()"
307312
]
308313
},
309314
{
@@ -347,9 +352,8 @@
347352
"metadata": {},
348353
"outputs": [],
349354
"source": [
350-
"if GEOPANDAS:\n",
351-
" latest_dv['date'] = latest_dv['time'].astype(str)\n",
352-
" latest_dv[['geometry', 'monitoring_location_id', 'date', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', tiles='CartoDB dark matter', cmap='YlOrRd', scheme=None, legend=True)"
355+
"latest_dv['date'] = latest_dv['time'].astype(str)\n",
356+
"latest_dv[['geometry', 'monitoring_location_id', 'date', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', tiles='CartoDB dark matter', cmap='YlOrRd', scheme=None, legend=True)"
353357
]
354358
},
355359
{
@@ -373,10 +377,8 @@
373377
" statistic_id=\"00011\"\n",
374378
")\n",
375379
"\n",
376-
"if GEOPANDAS:\n",
377-
" latest_instantaneous['datetime'] = latest_instantaneous['time'].astype(str)\n",
378-
"\n",
379-
" latest_instantaneous[['geometry', 'monitoring_location_id', 'datetime', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', cmap='YlOrRd', scheme=None, legend=True)"
380+
"latest_instantaneous['datetime'] = latest_instantaneous['time'].astype(str)\n",
381+
"latest_instantaneous[['geometry', 'monitoring_location_id', 'datetime', 'value', 'unit_of_measure']].set_crs(crs=\"WGS84\").explore(column='value', cmap='YlOrRd', scheme=None, legend=True)"
380382
]
381383
},
382384
{
@@ -413,7 +415,7 @@
413415
"id": "c5c5881e",
414416
"metadata": {},
415417
"source": [
416-
"Currently, users may only request 3 years or less of continuous data in one pull. For this example, let's pull the last 1 year of daily mean values and instantaneous values for these Missouri River sites. We'll skip pulling geometry in the `waterdata.get_daily()` function; the `waterdata.get_continuous()` function does not return geometry at all."
418+
"Currently, users may only request 3 years or less of continuous data in one pull. For this example, let's pull the last 1 year of daily mean values and instantaneous values for these Missouri River sites. We'll skip pulling geometry in the `waterdata.get_daily()` function; the `waterdata.get_continuous()` function does not return geometry at all to economize the size of the dataset returned."
417419
]
418420
},
419421
{
@@ -459,6 +461,14 @@
459461
")"
460462
]
461463
},
464+
{
465+
"cell_type": "markdown",
466+
"id": "c1663311",
467+
"metadata": {},
468+
"source": [
469+
"With these two datasets, let's plot daily and instantaneous discharge values for the four Missouri River sites using `matplotlib`. We will plot each site on a different subplot, with instantaneous values represented by a blue line and daily mean values represented by black points."
470+
]
471+
},
462472
{
463473
"cell_type": "code",
464474
"execution_count": null,
@@ -540,65 +550,35 @@
540550
"id": "e621e45a",
541551
"metadata": {},
542552
"source": [
543-
"Hey! We have some! Let's add these to our plots."
553+
"Hey! We have some! Let's add these to our plots from above. We'll loop through each monitoring location plot and add in field measurements as red points."
544554
]
545555
},
546556
{
547557
"cell_type": "code",
548558
"execution_count": null,
549-
"id": "87a397a4",
559+
"id": "42f22d69",
550560
"metadata": {},
551561
"outputs": [],
552562
"source": [
553-
"fig, axes = plt.subplots(2, 2, figsize=(14, 8), dpi=150, sharex=False, sharey=True)\n",
554-
"axes = axes.ravel()\n",
555-
"\n",
556-
"# Y-axis formatter (with thousands separators)\n",
557-
"tick_fmt = mtick.StrMethodFormatter('{x:,.0f}')\n",
563+
"for ax, site in zip(axes, missouri_site_ids):\n",
564+
" field = field_measurements.loc[\n",
565+
" field_measurements['monitoring_location_id'] == site, [\"time\", \"value\"]\n",
566+
" ].sort_values(\"time\")\n",
558567
"\n",
559-
"for ax, site, site_name in zip(axes, missouri_site_ids, missouri_site_names):\n",
560-
" # Filter per site & sort by time\n",
561-
" inst = instantaneous_values.loc[instantaneous_values['monitoring_location_id'] == site, [\"time\", \"value\"]].sort_values(\"time\")\n",
562-
" daily = daily_values.loc[daily_values['monitoring_location_id'] == site, [\"time\", \"value\"]].sort_values(\"time\")\n",
563-
" field = field_measurements.loc[field_measurements['monitoring_location_id'] == site, [\"time\", \"value\"]].sort_values(\"time\")\n",
564-
"\n",
565-
" # Instantaneous (line)\n",
566-
" ax.plot(\n",
567-
" inst[\"time\"], inst[\"value\"],\n",
568-
" color=\"#1f77b4\", lw=1.0, label=\"Instantaneous\", zorder=1\n",
569-
" )\n",
570-
"\n",
571-
" # Daily mean (black dots)\n",
572-
" ax.scatter(\n",
573-
" daily[\"time\"], daily[\"value\"],\n",
574-
" c=\"black\", s=2, label=\"Daily mean\", zorder=2\n",
575-
" )\n",
576-
"\n",
577-
" # Field measurements (red dots)\n",
578568
" ax.scatter(\n",
579569
" field[\"time\"], field[\"value\"],\n",
580570
" c=\"red\", s=4, label=\"Field\", zorder=3\n",
581571
" )\n",
582572
"\n",
583-
" # Axes styling\n",
584-
" ax.set_title(f\"{site}\\n{site_name}\", fontsize=10)\n",
585-
" ax.grid(True, which=\"both\", alpha=0.25)\n",
586-
" ax.yaxis.set_major_formatter(tick_fmt)\n",
587-
"\n",
588-
" # Time ticks\n",
589-
" ax.xaxis.set_major_locator(mdates.MonthLocator())\n",
590-
" ax.xaxis.set_major_formatter(mdates.DateFormatter(\"%b %Y\"))\n",
591-
" ax.xaxis.set_minor_locator(mdates.WeekdayLocator(byweekday=mdates.MO))\n",
592-
"\n",
593-
"axes[0].set_ylabel(\"Discharge (cubic feet per second)\")\n",
594-
"axes[2].set_ylabel(\"Discharge (cubic feet per second)\")\n",
595-
"axes[2].set_xlabel(\"\")\n",
596-
"axes[3].set_xlabel(\"\")\n",
597-
"\n",
573+
"# Remove any existing figure-level legends\n",
574+
"for leg in fig.legends:\n",
575+
" leg.remove()\n",
598576
"handles, labels = axes[-1].get_legend_handles_labels()\n",
599577
"fig.legend(handles, labels, loc=\"lower center\", ncol=3, frameon=False)\n",
600-
"fig.suptitle(f\"Missouri River sites - Daily Mean, Instantaneous, and Field Measurement Discharge\")\n",
601-
"fig.autofmt_xdate()\n"
578+
"\n",
579+
"# Redraw the figure\n",
580+
"fig.canvas.draw_idle()\n",
581+
"fig\n"
602582
]
603583
},
604584
{

0 commit comments

Comments
 (0)