Streamflow Eval :: Visualization#

This notebook is an example of how a HyTEST user may examine streamflow benchmark results from a hydrologic model.

Here, we are viewing daily streamflow benchmark results from the example analysis of National Water Model Retrospective version 2.1, forced with AORC, at streamflow benchmark locations (“cobalt gages” Foks et al., 2022).

Two benchmark results are examined:

Step 0: Load Required Python Libraries#

try:
    import pandas as pd
    import holoviews as hv
    import hvplot.pandas
    import panel as pn
    from geoviews import tile_sources as gvts
except ImportError:
    print("A required library could not be found. ")
    raise

Step 1: Load Domain Data#

Loading the dataset representing the ‘cobalt’ gages. This includes location and other metadata about each gage. We will need this extra metadata to get lat/lon and other characteristics of the gage locations.

cobalt = pd.read_csv(
    'https://www.sciencebase.gov/catalog/file/get/6181ac65d34e9f2789e44897?f=__disk__22%2F1a%2Fd2%2F221ad2fe9d95de17731ad35d0fc6b417a4b53ee1',
    dtype={'site_no':str, 'huc_cd':str, 'reachcode':str, 'comid':str, 'gagesII_class':str, 'aggecoregion': str}, 
    index_col='site_no'
    )
# Re-format the gage_id/site_no string value.  ex:   "1000000"  ==> "USGS-1000000"
cobalt.rename(index=lambda x: f'USGS-{x}', inplace=True)
cobalt.rename(columns={'dec_lat_va':'Lat', 'dec_long_va':'Lon'} , inplace=True)
print(f"{len(cobalt.index)} gages in this benchmark")
cobalt.head(2)
5390 gages in this benchmark
Lat Lon comid reachcode reach_meas drain_sqkm huc02 gagesII_class aggecoregion complete_yrs n_days nldi swim gfv1d1 camels
site_no
USGS-01011000 47.069722 -69.079444 721640 01010002000001 53.747189 3186.8 01 Non-ref NorthEast 33 12146 1 1 1 0
USGS-01013500 47.237500 -68.582778 724696 01010003000003 7.660419 2252.7 01 Ref NorthEast 33 12146 1 1 1 1

Step 2: Load NWM Analysis#

This table is the collection of evaluation metrics comparing observed vs. modeled streamflow of the cobalt gages. This data is the result of the NWM Benchmark Analysis Notebook.

NWM = pd.read_csv(r'./NWM_v2.1_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = NWM.columns.tolist()[1:] #list of columns, EXCEPT the first column (site_no)
NWM = NWM.merge(
    cobalt, # Table to merge with NWM
    how='left',            # left join preserves only records which have an index in NWM dataframe.
    left_index=True, 
    right_index=True
    )
# The effect of the left join is that if a cobalt gage does not have a computed benchmark in NWM, 
# (e.g. due to error in the analysis process), it is dropped from the visualization set.

Step 3: Visualize NWM Benchmark Results#

A quick look at the joined data table shows that we have a collection of metrics, and the associated metadata, all indexed by the site_id. This table is sortable by column – you can scroll and select to explore the raw result.

NWM.hvplot.table(
    columns=['site_no', 'drain_sqkm','KGE', 'NSE','logNSE','pearson','spearman','rSD','pbias','pBiasFMS','pBiasFHV','pBiasFLV','complete_yrs','n_days'], 
    sortable=True, 
    selectable=True
)

Step 3a: Benchmark Results Mapped Over the Spatial Extent of the Conterminous United States#

var_select = pn.widgets.Select(name='Metric', options=metrics, value='pearson')
base_map_select = pn.widgets.Select(name='Basemap:', 
                                    options=list(gvts.tile_sources.keys()), 
                                    value='OSM')

@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
    return NWM.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)

col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')

Step 3b: Boxplots#

Grouped by

  • Gages-II classification,

  • HUC02 group, or

  • Aggregated Ecoregion

# define which columns are metrics in the widget and which ones are groups
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='pearson')

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion')

@pn.depends(var_select, group_select)
def plot(var, group):
        # local scope func to calc summary stats using var/group
        # build tooltip using the result. 
        # pandas will compute with groupby(). 
    return NWM.hvplot.box(y = var, by = group, height=400, width=800, legend=False)

col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Box Plots')

## TODO: 
## Hover over box to tell user exactly the number of samples in group (count), 
# median, mean, max, min, and IQR.  # hovertool -- tooltip

Step 3c: Histograms#

Grouped by

  • Gages-II classification,

  • HUC02 group, or

  • Aggregated Ecoregion

var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='pearson'
    )

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion'
    )

@pn.depends(var_select, group_select)
def plot(var, group):
    return NWM.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')

col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Histograms')
# TODO: Constrain to fixed width for rendering.

Step 3d: Metric by Latitude & Longitude#

var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='pearson')

group_select = pn.widgets.Select(name='Group By:', 
                                    options=['Lon', 'Lat'], 
                                    value='Lon')

@pn.depends(var_select, group_select)
def plot(var, group):
    return NWM.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])

col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')

Step 3e: Metric v. Metric#

var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='pearson')

var2_select = pn.widgets.Select(name='Metric:', 
                                    options=metrics, 
                                    value='spearman')

@pn.depends(var_select, var2_select)

def plot(var, var2):
    return NWM.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])

col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')

Step 4: Visualize D-Score Analysis Results#

The above steps are repeated for the DScore analysis.

#Data is loaded 
DScore = pd.read_csv(r'./DScore_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = DScore.columns.tolist()[1:] 
DScore = DScore.merge(
    cobalt, 
    how='left',
    left_index=True, 
    right_index=True
    )
# Examine the data table
cols = ['site_no']
cols.extend(metrics)
DScore.hvplot.table(
    columns=cols, 
    sortable=True, 
    selectable=True
)
# Map
var_select = pn.widgets.Select(name='Metric', options=metrics, value='mse')
base_map_select = pn.widgets.Select(name='Basemap:', 
                                    options=list(gvts.tile_sources.keys()), 
                                    value='OSM')

@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
    return DScore.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)

col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')
# Box Plots
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='mse')

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion')

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.box(y = var, by = group, height=400, width=800, legend=False)

col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Box Plots')
# Histograms
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='mse'
    )

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion'
    )

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')

col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Histograms')
# metric v lat/lon scatter plot
var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='mse')

group_select = pn.widgets.Select(name='Group By:', 
                                    options=['Lon', 'Lat'], 
                                    value='Lon')

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])

col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')
# metric vs metric
var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='mse')

var2_select = pn.widgets.Select(name='Metric:', 
                                    options=metrics, 
                                    value='high')

@pn.depends(var_select, var2_select)

def plot(var, var2):
    return DScore.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])

col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')