# WRF-Hydro Data Exploration

In [None]:
import os
import fsspec
import xarray as xr
import hvplot.xarray

In [None]:
# Set up a filesystem to access the data files
fs = fsspec.filesystem('s3', endpoint_url='https://usgs.osn.mghpcc.org/', anon=True)

# list the directories and files in the top level directory for this data release
base_url = "s3://hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/"
fs.ls(base_url)
# note: the url can be appended with additional sub-directories to drill down and explore the files

In [None]:
# let's choose a particular set of files to look at
# we will make a list of all the CHANOBS - Level_Pool model outputs for one water year
file_dir = os.path.join(base_url, 'model_outputs_netcdf/CHANOBS/Level_Pool/WY2001')
nc_urls = fs.ls(file_dir)

In [None]:
#let's open and view the first of these files to see what it looks like
ds = xr.open_dataset(fs.open(nc_urls[0]))
ds

In [None]:
# Now let's open several time steps of this hourly data all at once so we can plot a time series of the data
# First, choose how many files you want to open. It takes ~2 minutes for a month, ~25 minutes for a full water year
# We will open the first month of data (31 days * 24 hours = 744 files)
num_files_to_open = 744

# open up each time step's file and append it to a list of datasets
datasets = []
for url in nc_urls[:num_files_to_open]:
    ds = xr.open_dataset(fs.open(url, mode="rb"), engine="h5netcdf") 
    datasets.append(ds)

In [None]:
# combine all time steps' datasets into a single xarray dataset
combined_ds = xr.combine_by_coords(datasets, coords='minimal', compat='override', combine_attrs='override')
combined_ds

In [None]:
# let's plot a streamflow time series at a particular location
# choose the feature_id you want to plot:
feature_id = 15448784
combined_ds.sel(feature_id=feature_id).streamflow.hvplot(x='time', grid=True)