Streamflow Eval :: Visualization#
This notebook is an example of how a HyTEST user may examine streamflow benchmark results from a hydrologic model.
Here, we are viewing daily streamflow benchmark results from the example analysis of National Water Model Retrospective version 2.1, forced with AORC, at streamflow benchmark locations (“cobalt gages” Foks et al., 2022).
Two benchmark results are examined:
the standard statistical suite results (Towler et al., 2022)
decomposition statistical suite, d-score (Hodson et al., 2022).
Step 0: Load Required Python Libraries#
try:
import pandas as pd
import holoviews as hv
import hvplot.pandas
import panel as pn
from geoviews import tile_sources as gvts
except ImportError:
print("A required library could not be found. ")
raise
Step 1: Load Domain Data#
Loading the dataset representing the ‘cobalt’ gages. This includes location and other metadata about each gage. We will need this extra metadata to get lat/lon and other characteristics of the gage locations.
cobalt = pd.read_csv(
'https://www.sciencebase.gov/catalog/file/get/6181ac65d34e9f2789e44897?f=__disk__22%2F1a%2Fd2%2F221ad2fe9d95de17731ad35d0fc6b417a4b53ee1',
dtype={'site_no':str, 'huc_cd':str, 'reachcode':str, 'comid':str, 'gagesII_class':str, 'aggecoregion': str},
index_col='site_no'
)
# Re-format the gage_id/site_no string value. ex: "1000000" ==> "USGS-1000000"
cobalt.rename(index=lambda x: f'USGS-{x}', inplace=True)
cobalt.rename(columns={'dec_lat_va':'Lat', 'dec_long_va':'Lon'} , inplace=True)
print(f"{len(cobalt.index)} gages in this benchmark")
cobalt.head(2)
5390 gages in this benchmark
Lat | Lon | comid | reachcode | reach_meas | drain_sqkm | huc02 | gagesII_class | aggecoregion | complete_yrs | n_days | nldi | swim | gfv1d1 | camels | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
site_no | |||||||||||||||
USGS-01011000 | 47.069722 | -69.079444 | 721640 | 01010002000001 | 53.747189 | 3186.8 | 01 | Non-ref | NorthEast | 33 | 12146 | 1 | 1 | 1 | 0 |
USGS-01013500 | 47.237500 | -68.582778 | 724696 | 01010003000003 | 7.660419 | 2252.7 | 01 | Ref | NorthEast | 33 | 12146 | 1 | 1 | 1 | 1 |
Step 2: Load NWM Analysis#
This table is the collection of evaluation metrics comparing observed vs. modeled streamflow of the cobalt gages. This data is the result of the NWM Benchmark Analysis Notebook.
NWM = pd.read_csv(r'./NWM_v2.1_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = NWM.columns.tolist()[1:] #list of columns, EXCEPT the first column (site_no)
NWM = NWM.merge(
cobalt, # Table to merge with NWM
how='left', # left join preserves only records which have an index in NWM dataframe.
left_index=True,
right_index=True
)
# The effect of the left join is that if a cobalt gage does not have a computed benchmark in NWM,
# (e.g. due to error in the analysis process), it is dropped from the visualization set.
Step 3: Visualize NWM Benchmark Results#
A quick look at the joined data table shows that we have a collection of metrics, and the associated metadata,
all indexed by the site_id
. This table is sortable by column – you can scroll and select to explore the
raw result.
NWM.hvplot.table(
columns=['site_no', 'drain_sqkm','KGE', 'NSE','logNSE','pearson','spearman','rSD','pbias','pBiasFMS','pBiasFHV','pBiasFLV','complete_yrs','n_days'],
sortable=True,
selectable=True
)
Step 3a: Benchmark Results Mapped Over the Spatial Extent of the Conterminous United States#
var_select = pn.widgets.Select(name='Metric', options=metrics, value='pearson')
base_map_select = pn.widgets.Select(name='Basemap:',
options=list(gvts.tile_sources.keys()),
value='OSM')
@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
return NWM.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)
col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')
Step 3b: Boxplots#
Grouped by
Gages-II classification,
HUC02 group, or
Aggregated Ecoregion
# define which columns are metrics in the widget and which ones are groups
var_select = pn.widgets.Select(name='Metric',
options=metrics,
value='pearson')
group_select = pn.widgets.Select(name='Group By:',
options=['huc02', 'gagesII_class', 'aggecoregion'],
value='aggecoregion')
@pn.depends(var_select, group_select)
def plot(var, group):
# local scope func to calc summary stats using var/group
# build tooltip using the result.
# pandas will compute with groupby().
return NWM.hvplot.box(y = var, by = group, height=400, width=800, legend=False)
col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Box Plots')
## TODO:
## Hover over box to tell user exactly the number of samples in group (count),
# median, mean, max, min, and IQR. # hovertool -- tooltip
Step 3c: Histograms#
Grouped by
Gages-II classification,
HUC02 group, or
Aggregated Ecoregion
var_select = pn.widgets.Select(name='Metric',
options=metrics,
value='pearson'
)
group_select = pn.widgets.Select(name='Group By:',
options=['huc02', 'gagesII_class', 'aggecoregion'],
value='aggecoregion'
)
@pn.depends(var_select, group_select)
def plot(var, group):
return NWM.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')
col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Histograms')
# TODO: Constrain to fixed width for rendering.
Step 3d: Metric by Latitude & Longitude#
var_select = pn.widgets.Select(name='Metric', options=metrics,
value='pearson')
group_select = pn.widgets.Select(name='Group By:',
options=['Lon', 'Lat'],
value='Lon')
@pn.depends(var_select, group_select)
def plot(var, group):
return NWM.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])
col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')
Step 3e: Metric v. Metric#
var_select = pn.widgets.Select(name='Metric', options=metrics,
value='pearson')
var2_select = pn.widgets.Select(name='Metric:',
options=metrics,
value='spearman')
@pn.depends(var_select, var2_select)
def plot(var, var2):
return NWM.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])
col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')
Step 4: Visualize D-Score Analysis Results#
The above steps are repeated for the DScore analysis.
#Data is loaded
DScore = pd.read_csv(r'./DScore_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = DScore.columns.tolist()[1:]
DScore = DScore.merge(
cobalt,
how='left',
left_index=True,
right_index=True
)
# Examine the data table
cols = ['site_no']
cols.extend(metrics)
DScore.hvplot.table(
columns=cols,
sortable=True,
selectable=True
)
# Map
var_select = pn.widgets.Select(name='Metric', options=metrics, value='mse')
base_map_select = pn.widgets.Select(name='Basemap:',
options=list(gvts.tile_sources.keys()),
value='OSM')
@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
return DScore.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)
col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')
# Box Plots
var_select = pn.widgets.Select(name='Metric',
options=metrics,
value='mse')
group_select = pn.widgets.Select(name='Group By:',
options=['huc02', 'gagesII_class', 'aggecoregion'],
value='aggecoregion')
@pn.depends(var_select, group_select)
def plot(var, group):
return DScore.hvplot.box(y = var, by = group, height=400, width=800, legend=False)
col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Box Plots')
# Histograms
var_select = pn.widgets.Select(name='Metric',
options=metrics,
value='mse'
)
group_select = pn.widgets.Select(name='Group By:',
options=['huc02', 'gagesII_class', 'aggecoregion'],
value='aggecoregion'
)
@pn.depends(var_select, group_select)
def plot(var, group):
return DScore.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')
col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Histograms')
# metric v lat/lon scatter plot
var_select = pn.widgets.Select(name='Metric', options=metrics,
value='mse')
group_select = pn.widgets.Select(name='Group By:',
options=['Lon', 'Lat'],
value='Lon')
@pn.depends(var_select, group_select)
def plot(var, group):
return DScore.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])
col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')
# metric vs metric
var_select = pn.widgets.Select(name='Metric', options=metrics,
value='mse')
var2_select = pn.widgets.Select(name='Metric:',
options=metrics,
value='high')
@pn.depends(var_select, var2_select)
def plot(var, var2):
return DScore.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])
col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')