WRF-Hydro Data Exploration

WRF-Hydro Data Exploration#

import os
import fsspec
import xarray as xr
import hvplot.xarray

# Set up a filesystem to access the data files
fs = fsspec.filesystem('s3', endpoint_url='https://usgs.osn.mghpcc.org/', anon=True)

# list the directories and files in the top level directory for this data release
base_url = "s3://hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/"
fs.ls(base_url)
# note: the url can be appended with additional sub-directories to drill down and explore the files

['hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/model_outputs_netcdf',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/namelist',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/restarts',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/static_input_files']

# let's choose a particular set of files to look at
# we will make a list of all the CHANOBS - Level_Pool model outputs for one water year
file_dir = os.path.join(base_url, 'model_outputs_netcdf/CHANOBS/Level_Pool/WY2001')
nc_urls = fs.ls(file_dir)

# Now let's open several time steps of this hourly data all at once so we can plot a time series of the data
# First, choose how many files you want to open. It takes ~2 minutes for a month, ~25 minutes for a full water year
# We will open the first month of data (31 days * 24 hours = 744 files)
num_files_to_open = 744

# open up each time step's file and append it to a list of datasets
datasets = []
for url in nc_urls[:num_files_to_open]:
    ds = xr.open_dataset(fs.open(url, mode="rb"), engine="h5netcdf") 
    datasets.append(ds)

# let's plot a streamflow time series at a particular location
# choose the feature_id you want to plot:
feature_id = 15448784
combined_ds.sel(feature_id=feature_id).streamflow.hvplot(x='time', grid=True)