D-Score Suite (v1) Benchmark#
These are custom-defined Python functions to calculate metrics comparing modeled/simulated against observed over time.
These statistics adapted from the originals in thodson-usgs/dscore
See: https://doi.org/10.1029/2021MS002681
The Metrics:#
This suite of metrics describesc the content of the benchmark:
Metric |
Reference |
---|---|
mse |
Mean Squared Error |
bias-distribution-sequence |
Hodson et al., 2021 (in review), Mean squared error, deconstructed. Journal of Advances in Earth Systems Modeling. |
sequence |
|
Seasonal-winter |
|
Seasonal-spring |
|
Seasonal-summer |
|
Seasonal-fall |
|
mse_Q0025 ; mse_Qlow |
|
mse_Q2550 ; mse_Qbelowavg |
|
mse_Q5075 ; mse_Qaboveavg |
|
mse_Q75100 ; mse_Qhigh |
This notebook will briefly describe each of the above metrics. The specific code to implement each is included as a function definition. This notebook can be sourced into analysis notebooks to get access to these functions natively.
See D-Score Usage for an example of using these functions with a demonstration dataset.
@thodson-usgs – are these items included in DScore v1 ?
metric |
reference |
---|---|
bias-variance |
Geman et al., 1992, Neural networks and the bias/variance dilemma. Neural Computation, 4(1), 158. http://dx.doi.org/10.1162/neco.1992.4.1.1 |
score |
Exponential scoring function that maps MSE to the unit interval. |
import logging
import numpy as np
import pandas as pd
Mean Squared Error#
def mse(obs, sim) -> float:
"""
Mean Square Error -- Compute MSE over all paired values observed (x) and simulated/modeled (x_hat)
.. math::
\sum_{i=1}^{n}(x_i - \hat{x}_i)^2
:return: mean square error
:rtype: float
NOTE: this and all functions below rely upon the obs and sim datatypes implementing
certain math methods on themselves. That is, obs.sum() must be defined by
typeof(obs). Pandas Series and DataFrames do this, but other array_like
may not.
"""
e = obs - sim
return (e**2).mean()
Percent Bias#
def pbias(obs, sim) -> float:
"""
percent bias -- a measure of the mean tendency of simulated values to be
greater than or less than associated observed values.
Returns:
float: calculated percent bias / units = percent (i.e. 90 rather than 0.90)
"""
return 100 * (sim - obs).sum() / obs.sum()
Bias & Variance Decomposition#
From ‘Bias-Variance Trade-off’
def e_bias(obs, sim) -> float:
"""
Bias = square of mean error
Returns:
_type_: _description_
"""
e = sim - obs
return e.mean()**2
def e_variance(obs, sim) -> float:
"""
Variance of error
Args:
obs (pd.Series - like): data representing observed values
sim (pd.Series - like): data representing simulated/modeled values
Returns:
float: variance of the error
"""
e = sim - obs
return e.var(ddof=1)
Bias-Distribution-Sequence Decomposition#
def bias_distribution_sequence(obs, sim) -> float:
"""
Decomposition into bias, distribution, sequence
Args:
obs (pd.Series - like): data representing observed values
sim (pd.Series - like): data representing simulated/modeled values
Returns:
pd.Series: three metrics, indexed by name
"""
e = sim - obs
s = np.sort(sim) - np.sort(obs)
var_s = s.var(ddof=1)
var_e = e.var(ddof=1)
e_seq = var_e - var_s
e_dist = var_s
e_bias = e.mean()**2
return pd.Series(
[e_bias, e_dist, e_seq],
index=["e_bias", "e_dist", "e_seq"]
)
Seasonal Decomposition#
def seasonal_mse(obs, sim):
"""
Decompose error by season.
Args:
obs (pd.Series - like): data representing observed values
sim (pd.Series - like): data representing simulated/modeled values
Both obs and sim should be time-indexed, such that we can pick out months
from the time value.
Returns:
pd.Series : mse for 4 major seasons
NOTE: 'season' is viewed from a northern-hemisphere perspective
"""
e = sim - obs
idx = (e.index.month == 12) | (e.index.month <= 2) #Dec, Jan, Feb
winter = ((e*idx)**2).mean()
idx = (e.index.month > 2) & (e.index.month <= 5) #Mar, Apr, May
spring = ((e*idx)**2).mean()
idx = (e.index.month > 5) & (e.index.month <= 8) #Jun, Jul, Aug
summer = ((e*idx)**2).mean()
idx = (e.index.month > 8) & (e.index.month <= 11) #Sep, Oct, Nov
fall = ((e*idx)**2).mean()
return pd.Series([winter, spring, summer, fall], index=['winter', 'spring', 'summer', 'fall'])
Quantile Decomposition#
MSE is decomposed by quantile range:
Q from |
Q to |
Label |
---|---|---|
0.00 |
0.25 |
Low |
0.25 |
0.50 |
Below-Average |
0.50 |
0.75 |
Above-Average |
0.75 |
1.00 |
High |
def quantile_mse(obs, sim):
"""
Decomposes MSE by quantile rangess 0.00-0.25; 0.25-0.5; 0.5-0.75; 0.75-1.00
Args:
obs (pd.Series - like ): series of observed values
sim (_type_): series of simulated/modeled values
Both share a common index
Returns:
pd.Series : decomposed MSE, one value per quantile range
"""
breaks=[0, 0.25, 0.5, 0.75, 1]
labels=['low', 'below_avg', 'above_avg', 'high']
e = sim - obs
scores = []
ranks = obs.rank(method='first')
quants = pd.qcut(ranks, q=breaks)
for i in range(len(breaks) - 1):
quant = e * (quants == quants.cat.categories[i]) # select quantile
mse_q = ((quant)**2).mean()
scores.append(mse_q)
return pd.Series(scores, index=labels)
def score(e, a=1.0):
"""
Scores an error
Exponential scoring function that maps MSE to the unit interval.
Parameters
----------
a : float
Positive tuning parameter.
References
----------
.. [1] Collier et al., 2018, The International Land Model Benchmarking
(ILAMB) system: Design, theory, and implementation. Journal of Advances
in Modeling Earth Systems, 10(11), http://dx.doi.org/10.1029/2018ms001354
"""
if a <= 0.0:
raise ValueError("Tuning parameter must be a positive float")
return np.exp(-1 * a * e)
try:
from statsmodels.tsa.seasonal import STL
_SEASONAL = True
except ImportError:
logging.debug("STL library not available.")
_SEASONAL = False
def stl(obs, sim):
"""
Decompose error using STL.
Seasonal and trend decomposition using Loess (STL).
Note that STL is not perfectly orthogonal.
References
----------
.. [1] Cleveland et al., 1990, STL: A seasonal-trend decomposition
procedure based on loess. Journal of Official Statistics, 6(1), 3-73.
"""
if not _SEASONAL:
logging.warning("STL statistics not available.")
return None
e = sim - obs
res = STL(e, period=365, seasonal=9).fit()
E = pd.DataFrame(
{
'trend': res.trend,
'seasonality': res.seasonal,
'residual': res.resid
}
)
return (E**2).mean()
ScoreCard Visualizations#
These routines help with formatting scorecards…
import matplotlib.pyplot as plt
def ilamb_card_II(df, ax=None, clim=(0,100), cmap='RdYlBu'):
axs = ax or plt.gca()
try:
axs.set_xlabel(df.name) # note that name is not a standard dataframe attribute.
except NameError:
pass # Don't set xaxis label name
axs.set_ylabel("Component")
axs.set_yticks(np.arange(len(df.index)), labels=df.index, fontsize=6)
axs.yaxis.set_tick_params(length=0, which='minor')
axs.set_xticks(np.arange(len(df.columns)), labels=df.columns, fontsize=6, rotation=90, ha='center', va='bottom')
axs.xaxis.tick_top()
axs.xaxis.set_tick_params(length=0)
axs.xaxis.set_label_position('top')
axs.axhline(y=0.5, color='k') #betwen cells 0 and 1
axs.axhline(y=3.5, color='k') # between 3 and 4
axs.axhline(y=7.5, color='k') # between 7 and 8
im = axs.imshow(df, cmap=cmap, vmin=clim[0], vmax=clim[1], alpha=0.80)
cbar = axs.figure.colorbar(im, ax=axs, location='bottom', ticks=[0, 50, 100], ticklocation='bottom', pad=0.05, fraction=0.15, shrink=0.5)
cbar.ax.tick_params(labelsize=4, width=0.2, length=2, pad=1, labelbottom=True)
cbar.outline.set_linewidth(0.2)
## Annotates each cell...
txtattrs = dict(ha="center", va="center", fontsize=5)
i=0
for col in df.columns:
j=0
for row in df.index:
text = im.axes.text(i, j, df[col][row], **txtattrs)
j+=1
i+=1
## Offset minor gridlines.... paint them white.
axs.set_yticks(np.arange(len(df.index)+1)-.5, minor=True)
axs.set_xticks(np.arange(len(df.columns)+1)-.5, minor=True)
axs.grid(which="minor", color="w", linestyle='-', linewidth=0.75)
return axs