WRF-Hydro Data Exploration

WRF-Hydro Data Exploration#

import os
import fsspec
import xarray as xr
import hvplot.xarray
# Set up a filesystem to access the data files
fs = fsspec.filesystem('s3', endpoint_url='https://usgs.osn.mghpcc.org/', anon=True)

# list the directories and files in the top level directory for this data release
base_url = "s3://hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/"
fs.ls(base_url)
# note: the url can be appended with additional sub-directories to drill down and explore the files
['hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/model_outputs_netcdf',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/namelist',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/restarts',
 'hytest/wrf_hydro_nhdplusv2_conus404ba_1980-2022/static_input_files']
# let's choose a particular set of files to look at
# we will make a list of all the CHANOBS - Level_Pool model outputs for one water year
file_dir = os.path.join(base_url, 'model_outputs_netcdf/CHANOBS/Level_Pool/WY2001')
nc_urls = fs.ls(file_dir)
#let's open and view the first of these files to see what it looks like
ds = xr.open_dataset(fs.open(nc_urls[0]))
ds
<xarray.Dataset> Size: 277kB
Dimensions:         (time: 1, reference_time: 1, feature_id: 8643)
Coordinates:
  * time            (time) datetime64[ns] 8B 2000-10-01
  * reference_time  (reference_time) datetime64[ns] 8B 1999-12-01
  * feature_id      (feature_id) int64 69kB 7086109 7040481 ... 15448784
    latitude        (feature_id) float32 35kB ...
    longitude       (feature_id) float32 35kB ...
Data variables:
    crs             |S1 1B ...
    order           (feature_id) int32 35kB ...
    elevation       (feature_id) float32 35kB ...
    streamflow      (time, feature_id) float64 69kB ...
Attributes: (12/19)
    TITLE:                      OUTPUT FROM WRF-Hydro v5.2.0
    compiler_version:           Intel(R) Fortran Intel(R) 64 Compiler for app...
    featureType:                timeSeries
    proj4:                      +proj=lcc +units=m +a=6370000.0 +b=6370000.0 ...
    model_initialization_time:  1999-12-01_00:00:00
    station_dimension:          feature_id
    ...                         ...
    model_configuration:        retrospective
    dev_OVRTSWCRT:              1
    dev_NOAH_TIMESTEP:          3600
    dev_channel_only:           0
    dev_channelBucket_only:     1
    dev:                        dev_ prefix indicates development/internal me...
# Now let's open several time steps of this hourly data all at once so we can plot a time series of the data
# First, choose how many files you want to open. It takes ~2 minutes for a month, ~25 minutes for a full water year
# We will open the first month of data (31 days * 24 hours = 744 files)
num_files_to_open = 744

# open up each time step's file and append it to a list of datasets
datasets = []
for url in nc_urls[:num_files_to_open]:
    ds = xr.open_dataset(fs.open(url, mode="rb"), engine="h5netcdf") 
    datasets.append(ds)
# combine all time steps' datasets into a single xarray dataset
combined_ds = xr.combine_by_coords(datasets, coords='minimal', compat='override', combine_attrs='override')
combined_ds
<xarray.Dataset> Size: 103MB
Dimensions:         (time: 744, feature_id: 8643, reference_time: 1)
Coordinates:
  * time            (time) datetime64[ns] 6kB 2000-10-01 ... 2000-10-31T23:00:00
  * reference_time  (reference_time) datetime64[ns] 8B 1999-12-01
  * feature_id      (feature_id) int64 69kB 7086109 7040481 ... 15448784
    latitude        (feature_id) float32 35kB ...
    longitude       (feature_id) float32 35kB ...
Data variables:
    crs             (time) |S1 744B b'' b'' b'' b'' b'' ... b'' b'' b'' b'' b''
    order           (time, feature_id) int32 26MB 4 2 2 2 1 2 2 ... 3 2 5 5 5 4
    elevation       (time, feature_id) float32 26MB 320.4 328.8 ... 77.56 55.88
    streamflow      (time, feature_id) float64 51MB 1.57 0.02 ... 15.64 2.26
Attributes: (12/19)
    TITLE:                      OUTPUT FROM WRF-Hydro v5.2.0
    compiler_version:           Intel(R) Fortran Intel(R) 64 Compiler for app...
    featureType:                timeSeries
    proj4:                      +proj=lcc +units=m +a=6370000.0 +b=6370000.0 ...
    model_initialization_time:  1999-12-01_00:00:00
    station_dimension:          feature_id
    ...                         ...
    model_configuration:        retrospective
    dev_OVRTSWCRT:              1
    dev_NOAH_TIMESTEP:          3600
    dev_channel_only:           0
    dev_channelBucket_only:     1
    dev:                        dev_ prefix indicates development/internal me...
# let's plot a streamflow time series at a particular location
# choose the feature_id you want to plot:
feature_id = 15448784
combined_ds.sel(feature_id=feature_id).streamflow.hvplot(x='time', grid=True)