Skip to article frontmatterSkip to article content
Site not loading correctly?

This may be due to an incorrect BASE_URL configuration. See the MyST Documentation for reference.

Streamflow Eval :: Visualization

This notebook is an example of how a HyTEST user may examine streamflow benchmark results from a hydrologic model.

Here, we are viewing daily streamflow benchmark results from the example analysis of National Water Model Retrospective version 2.1, forced with AORC, at streamflow benchmark locations (“cobalt gages” Foks et al., 2022).

Two benchmark results are examined:

Step 0: Load Required Python Libraries

try:
    import pandas as pd
    import holoviews as hv
    import hvplot.pandas
    import panel as pn
    from geoviews import tile_sources as gvts
except ImportError:
    print("A required library could not be found. ")
    raise

Step 1: Load Domain Data

Loading the dataset representing the ‘cobalt’ gages. This includes location and other metadata about each gage. We will need this extra metadata to get lat/lon and other characteristics of the gage locations.

cobalt = pd.read_csv(
    'https://www.sciencebase.gov/catalog/file/get/6181ac65d34e9f2789e44897?f=__disk__22%2F1a%2Fd2%2F221ad2fe9d95de17731ad35d0fc6b417a4b53ee1',
    dtype={'site_no':str, 'huc_cd':str, 'reachcode':str, 'comid':str, 'gagesII_class':str, 'aggecoregion': str}, 
    index_col='site_no'
    )
# Re-format the gage_id/site_no string value.  ex:   "1000000"  ==> "USGS-1000000"
cobalt.rename(index=lambda x: f'USGS-{x}', inplace=True)
cobalt.rename(columns={'dec_lat_va':'Lat', 'dec_long_va':'Lon'} , inplace=True)
print(f"{len(cobalt.index)} gages in this benchmark")
cobalt.head(2)

Step 2: Load NWM Analysis

This table is the collection of evaluation metrics comparing observed vs. modeled streamflow of the cobalt gages. This data is the result of the NWM Benchmark Analysis Notebook.

NWM = pd.read_csv(r'./NWM_v2.1_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = NWM.columns.tolist()[1:] #list of columns, EXCEPT the first column (site_no)
NWM = NWM.merge(
    cobalt, # Table to merge with NWM
    how='left',            # left join preserves only records which have an index in NWM dataframe.
    left_index=True, 
    right_index=True
    )
# The effect of the left join is that if a cobalt gage does not have a computed benchmark in NWM, 
# (e.g. due to error in the analysis process), it is dropped from the visualization set.

Step 3: Visualize NWM Benchmark Results

A quick look at the joined data table shows that we have a collection of metrics, and the associated metadata, all indexed by the site_id. This table is sortable by column -- you can scroll and select to explore the raw result.

NWM.hvplot.table(
    columns=['site_no', 'drain_sqkm','KGE', 'NSE','logNSE','pearson','spearman','rSD','pbias','pBiasFMS','pBiasFHV','pBiasFLV','complete_yrs','n_days'], 
    sortable=True, 
    selectable=True
)

Step 3a: Benchmark Results Mapped Over the Spatial Extent of the Conterminous United States

var_select = pn.widgets.Select(name='Metric', options=metrics, value='pearson')
base_map_select = pn.widgets.Select(name='Basemap:', 
                                    options=list(gvts.tile_sources.keys()), 
                                    value='OSM')

@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
    return NWM.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)

col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')

Step 3b: Boxplots

Grouped by

  • Gages-II classification,

  • HUC02 group, or

  • Aggregated Ecoregion

# define which columns are metrics in the widget and which ones are groups
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='pearson')

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion')

@pn.depends(var_select, group_select)
def plot(var, group):
        # local scope func to calc summary stats using var/group
        # build tooltip using the result. 
        # pandas will compute with groupby(). 
    return NWM.hvplot.box(y = var, by = group, height=400, width=800, legend=False)

col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Box Plots')

## TODO: 
## Hover over box to tell user exactly the number of samples in group (count), 
# median, mean, max, min, and IQR.  # hovertool -- tooltip

Step 3c: Histograms

Grouped by

  • Gages-II classification,

  • HUC02 group, or

  • Aggregated Ecoregion

var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='pearson'
    )

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion'
    )

@pn.depends(var_select, group_select)
def plot(var, group):
    return NWM.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')

col = pn.Column(var_select, group_select, plot)
col.servable('NWM Benchmark Histograms')
# TODO: Constrain to fixed width for rendering.

Step 3d: Metric by Latitude & Longitude

var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='pearson')

group_select = pn.widgets.Select(name='Group By:', 
                                    options=['Lon', 'Lat'], 
                                    value='Lon')

@pn.depends(var_select, group_select)
def plot(var, group):
    return NWM.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])

col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')

Step 3e: Metric v. Metric

var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='pearson')

var2_select = pn.widgets.Select(name='Metric:', 
                                    options=metrics, 
                                    value='spearman')

@pn.depends(var_select, var2_select)

def plot(var, var2):
    return NWM.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])

col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')

Step 4: Visualize D-Score Analysis Results

The above steps are repeated for the DScore analysis.

#Data is loaded 
DScore = pd.read_csv(r'./DScore_streamflow_example.csv', dtype={'site_no':str} ).set_index('site_no', drop=False)
# Merge benchmarks with cobalt data to form a single table, indexed by site_no
metrics = DScore.columns.tolist()[1:] 
DScore = DScore.merge(
    cobalt, 
    how='left',
    left_index=True, 
    right_index=True
    )
# Examine the data table
cols = ['site_no']
cols.extend(metrics)
DScore.hvplot.table(
    columns=cols, 
    sortable=True, 
    selectable=True
)
# Map
var_select = pn.widgets.Select(name='Metric', options=metrics, value='mse')
base_map_select = pn.widgets.Select(name='Basemap:', 
                                    options=list(gvts.tile_sources.keys()), 
                                    value='OSM')

@pn.depends(var=var_select, base_map=base_map_select)
def plot(var, base_map):
    return DScore.hvplot.points(x='Lon', y='Lat', color=var, cmap='turbo_r', geo=True, tiles=base_map)

col = pn.Column(var_select, base_map_select, plot)
col.servable('Hydro Assessment Tool')
# Box Plots
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='mse')

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion')

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.box(y = var, by = group, height=400, width=800, legend=False)

col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Box Plots')
# Histograms
var_select = pn.widgets.Select(name='Metric', 
        options=metrics, 
        value='mse'
    )

group_select = pn.widgets.Select(name='Group By:', 
        options=['huc02', 'gagesII_class', 'aggecoregion'], 
        value='aggecoregion'
    )

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.hist(var, group, subplots=True, width=400, bins = 500, legend='top')

col = pn.Column(var_select, group_select, plot)
col.servable('DScore Benchmark Histograms')
# metric v lat/lon scatter plot
var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='mse')

group_select = pn.widgets.Select(name='Group By:', 
                                    options=['Lon', 'Lat'], 
                                    value='Lon')

@pn.depends(var_select, group_select)
def plot(var, group):
    return DScore.hvplot.scatter(x=group, y=var, height=400, width = 500, legend='top', hover_cols=["site_no","Lat","Lon"])

col = pn.Column(var_select, group_select, plot)
col.servable('Lat/Lon Scatter Plot')
# metric vs metric
var_select = pn.widgets.Select(name='Metric', options=metrics, 
                               value='mse')

var2_select = pn.widgets.Select(name='Metric:', 
                                    options=metrics, 
                                    value='high')

@pn.depends(var_select, var2_select)

def plot(var, var2):
    return DScore.hvplot.scatter(x = var, y = var2, height=400, width = 500, legend='top', hover_cols=['site_no','Lat', 'Lon'])

col = pn.Column(var_select, var2_select, plot)
col.servable('Metric v Metric Scatter Plot')