Streamflow - exercises
Import relevant packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set(style="ticks", font_scale=1.5)
from ipywidgets import *
# these will be useful when printing superscripts: m^2, m^3
squared = "\u00b2"
cubed = "\u00b3"
Import streamflow data from USGS's National Water Information System. We will be using data from Urbana, IL. The time resolution varies, it can be 15 minutes, it can be 5 minutes!
# Drainage area: 4.78 square miles
data_file = "USGS 03337100 BONEYARD CREEK AT LINCOLN AVE AT URBANA, IL.dat"
df_q_2020 = pd.read_csv(data_file,
header=31, # no headers needed, we'll do that later
delim_whitespace=True, # blank spaces separate between columns
na_values=["Bkw"] # substitute these values for missing (NaN) values
)
df_q_2020.columns = ['agency_cd', 'site_no','datetime','tz_cd','EDT','discharge','code'] # rename df columns with headers columns
df_q_2020['date_and_time'] = df_q_2020['datetime'] + ' ' + df_q_2020['tz_cd'] # combine date+time into datetime
df_q_2020['date_and_time'] = pd.to_datetime(df_q_2020['date_and_time']) # interpret datetime
df_q_2020 = df_q_2020.set_index('date_and_time') # make datetime the index
df_q_2020['discharge'] = df_q_2020['discharge'].astype(float)
df_q_2020['discharge'] = df_q_2020['discharge'] * 0.0283168 # convert cubic feet per second to m3/s
fig, ax = plt.subplots(figsize=(10,7))
ax.plot(df_q_2020['discharge'], '-o')
plt.gcf().autofmt_xdate()
ax.set(xlabel="date",
ylabel=r"discharge (m$^3$/5min)");
Import sub-hourly (5-min) rainfall data from NOAA's Climate Reference Network Data website.
data_file = "Champaign - IL.txt"
df_p_2020 = pd.read_csv(data_file,
header=None, # no headers needed, we'll do that later
delim_whitespace=True, # blank spaces separate between columns
na_values=["-99.000", "-9999.0"] # substitute these values for missing (NaN) values
)
headers = pd.read_csv("HEADERS_sub_hourly.txt", # load headers file
header=1, # skip the first [0] line
delim_whitespace=True
)
df_p_2020.columns = headers.columns # rename df columns with headers columns
# LST = local standard time
df_p_2020["LST_TIME"] = [f"{x:04d}" for x in df_p_2020["LST_TIME"]] # time needs padding of zeros, then convert to string
df_p_2020['LST_DATE'] = df_p_2020['LST_DATE'].astype(str) # convert date into string
df_p_2020['datetime'] = df_p_2020['LST_DATE'] + ' ' + df_p_2020['LST_TIME'] # combine date+time into datetime
df_p_2020['datetime'] = pd.to_datetime(df_p_2020['datetime']) # interpret datetime
df_p_2020 = df_p_2020.set_index('datetime') # make datetime the index
Plot rainfall and streamflow. Does this makes sense?
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10,7))
fig.subplots_adjust(hspace=0.05)
start = "2020-10-18"
end = "2020-10-25"
ax1.plot(df_p_2020[start:end]['PRECIPITATION'])
ax2.plot(df_q_2020[start:end]['discharge'], color="tab:blue", lw=2)
ax1.set(xticks=[],
ylabel=r"precipitation (mm)")
ax2.set(xlabel="date",
ylabel=r"discharge (m$^3$/s)")
plt.gcf().autofmt_xdate() # makes slated dates
Define smaller dataframes for $p(t)$ and $q(t)$, between the dates:
start = "2020-10-20 14:00:00"
end = "2020-10-21 04:00:00"
Don't forget to convert the units to SI!
Calculate total rainfall $P$ and total discharge $Q$, in m$^3$.
# Drainage area: 4.78 square miles
area = 4.78 / 0.00000038610 # squared miles to squared meters
start = "2020-10-20 14:00:00"
end = "2020-10-21 04:00:00"
# time resolution for p is 5 minutes, for q is 15 minutes
delta_t_precipitation = 5 * 60 # in seconds
delta_t_discharge = 5 * 60 # in seconds
df_p = df_p_2020.loc[start:end, 'PRECIPITATION'].to_frame()
df_q = df_q_2020.loc[start:end]['discharge'].to_frame()
# convert mm to m3.
# a) it is understood that 1 mm = 1 L/m2 = 1e-3 m3/m2.
# b) divide by 1000 to convert mm to m3/m2
# c) multiply by area (m2) to obtain total volume (m3) in whole watershed
df_p['precipitation'] = df_p['PRECIPITATION'].values * area / 1000 # mm to m3 in the whole watershed
# precipitation depth was originally given in mm in 5-min windows
# divide by delta_t_precipitation = 5*60 to obtain finally m3/s
df_p['precipitation'] = df_p['precipitation'] / delta_t_precipitation
# Total volumes (m3) = sum of all bars times bar width (delta_t)
P = df_p['precipitation'].sum() * delta_t_precipitation
Q = df_q['discharge'].sum() * delta_t_discharge
print(f"total precipitation during event:\nP = {P:.1e} m{cubed}")
print(f"total streamflow during event:\nQ = {Q:.1e} m{cubed}")
print(f"Streamflow is {Q/P:.0%} of precipitation")
Make another graph of $p(t)$ and $q(t)$, now with SI units.
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10,7))
fig.subplots_adjust(hspace=0.05)
start = "2020-10-18"
end = "2020-10-25"
ax1.plot(df_p['precipitation'])
ax2.plot(df_q['discharge'], color="tab:blue", lw=2)
ax1.set(xticks=[],
ylabel=r"precipitation (m$^3$/s)",
title="Precipitation and discharge, Boneyard Creek at Urbana, IL\n 20-21 October 2020, 5-minute data")
ax2.set(xlabel="date",
ylabel=r"discharge (m$^3$/s)")
plt.gcf().autofmt_xdate() # makes slated dates
It's time for base flow separation! Convert q(t) into q*(t)
from matplotlib.dates import HourLocator, DateFormatter
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10,7))
fig.subplots_adjust(wspace=0.05)
ax1.plot(df_q['discharge'], color="black", lw=2)
# choose 2 by trial and error
point1 = pd.to_datetime("2020-10-20 16:40:00")
point2 = pd.to_datetime("2020-10-21 00:00:00")
two_points = df_q.loc[[point1, point2], 'discharge']
ax1.plot(two_points, 'o', color="tab:red")
# make new dataframe with the two chosen points
df_linear = pd.DataFrame(data=two_points, index=two_points.index)
# produce more points every 5 minutes by linear interpolation
df_linear = (df_linear.resample("5min") # resample
.interpolate(method='time') # interpolate by time (linear)
)
ax1.plot(df_linear, color="tab:blue")
# shade blue the region between the two curves
df_between_2_points = df_q.loc[df_linear.index]
ax1.fill_between(df_between_2_points.index, df_between_2_points['discharge'],
y2=df_linear['discharge'],
color="tab:blue", alpha=0.3)
# calculate qstar = q - q_baseflow
qstar = df_q.loc[df_linear.index, 'discharge'] - df_linear['discharge']
Qstar = qstar.sum() * delta_t_discharge
ax2.plot(qstar, color="black", lw=2)
ax2.fill_between(qstar.index, qstar,
y2=0.0,
color="tab:blue", alpha=0.3)
ax1.set(xlim=[df_q.index[0],
df_q.index[-1]],
ylabel=r"discharge (m$^3$/s)",
ylim=[0, 5.5],
yticks=[0,1,2,3,4],
title="total discharge, q(t)")
ax2.set(yticks=[],
ylim=[0, 5.5],
xlim=[df_q.index[0],
df_q.index[-1]],
title="effective discharge, q*(t)"
)
plt.gcf().autofmt_xdate() # makes slated dates
We can calculate p* now, using
$$ P^* = Q^* $$One of the simplest methods is to multiply $p(t)$ by a fixed constant (<1) to obtain $p^*$, so that the equation above holds true.
ratio = Qstar/ P
print(f"Qstar / P = {ratio:.2f}")
print(f"This means that {Qstar/P:.0%} of total precipitation became stormflow")
print(f"We can find Pstar by calculating Pstar = {ratio:.2f} * P")
pstar = df_p['precipitation'] * ratio # m3/s
Pstar = pstar.sum() * delta_t_precipitation # m3
Calculate now the centroid ($t_pc$) for effective precipitation p and centroid ($t_{qc}$) of effective discharge q. Calculate also the time of peak discharge ($t_{pk}$). Then, calculate the centroid lag ($T_{LC}$), the centroid lag-to-peak ($T_{LPC}$), and the time of concentration ($T_c$). Use the equations below:
$T_{LPC} \simeq 0.60 \cdot T_c$
Time of precipitation centroid:
$$ t_{pc} = \frac{\displaystyle \sum_{i=1}^n p_i^* \cdot t_i}{P^*} $$Time of streamflow centroid:
$$ t_{qc} = \frac{\displaystyle \sum_{i=1}^n q_i^* \cdot t_i}{Q^*} $$Centroid lag:
$$ T_{LC} = t_{qc} - t_{pc} $$Centroid lag-to-peak: $$ T_{LPC} = t_{pk} - t_{pc} $$
Time of concentration: $$ T_{LPC} \simeq 0.60 \cdot T_c $$
# pstar centroid
# time of the first (nonzero) rainfall data point
t0 = pstar[pstar != 0.0].index[0]
# time of the last (nonzero) rainfall data point
tf = pstar[pstar != 0.0].index[-1]
# duration of the rainfall event, in minutes
td = (tf-t0) / pd.Timedelta('1 min')
# make time array, add 2.5 minutes (half of dt)
time = np.arange(0, td+1, 5) + 2.5
# create pi array, only with relevant data (during rainfall duration)
pi = pstar.loc[(pstar.index >= t0) & (pstar.index <= tf)]
# time of precipitation centroid
t_pc = (pi * time).sum() / pi.sum()
# add initial time
t_pc = t0 + pd.Timedelta(minutes=t_pc)
t_pc
# qstar centroid
# time of the first (nonzero) discharge data point
t0 = qstar[qstar != 0.0].index[0]
# time of the last (nonzero) discharge data point
tf = qstar[pstar != 0.0].index[-1]
# duration of the discharge event, in minutes
td = (tf-t0) / pd.Timedelta('1 min')
# make time array, add 2.5 minutes (half of dt)
time = np.arange(0, td+1, 5) + 2.5
# create qi array, only with relevant data (during discharge duration)
qi = qstar.loc[(qstar.index >= t0) & (qstar.index <= tf)]
# time of discharge centroid
t_qc = (qi * time).sum() / qi.sum()
# add initial time
t_qc = t0 + pd.Timedelta(minutes=t_qc)
t_qc
# time of peak discharge
max_discharge = qstar.max()
t_pk = qstar[qstar == max_discharge].index[0]
# centroid lag
T_LC = t_qc - t_pc
# centroid lag-to-peak
T_LPC = t_pk - t_pc
# time of concentration
T_c = T_LPC / 0.60
print(f"T_LC = {T_LC}")
print(f"T_LPC = {T_LPC}")
print(f"T_c = {T_c}")