import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()  # datetime converter for a matplotlib
import seaborn as sns
sns.set(style="ticks", font_scale=1.5)
df = pd.read_csv('bet-dagan-3h.csv', encoding = 'unicode_escape', na_values=["-"])
# find out what hebrew gibberish means: http://www.pixiesoft.com/flip/
name_conversion_dictionary = {"ùí úçðä": "station name",
                        "îñôø úçðä": "station number",
                        "úàøéê": "Date",
                        "ùòä-LST": "LST time",
                        "èîôøèåøä(C°)": "T",
                        "èîôøèåøä ìçä(C°)": "wet-bulb temperature (°C)",
                        "èîôøèåøú ð÷åãú äèì(C°)": "dew_point_T",
                        "ìçåú éçñéú(%)": "relative humidity (%)",
                        "îäéøåú äøåç(m/s)": "wind_speed",
                        "ëéååï äøåç(îòìåú)": "wind direction (degrees)",
                        "ìçõ áâåáä äúçðä(hPa)": "Pressure",
                        "ìçõ áâåáä ôðé äéí(hPa)": "pressure at sea level (hPa)",
                        """äúàãåú éåîéú îâéâéú ñåâ à'(î"î)""": "pan evaporation (mm)",
                        "ñåâ ÷øéðä()": "radiation type",
                       }
# T = temperature (°C)
# dew_point_T = dew point temperature (°C)
# wind_speed = wind speed (m/s)
# Pressure = pressure at station height (hPa)
df = df.rename(columns=name_conversion_dictionary)
df['Date'] = pd.to_datetime(df['Date'], dayfirst=True)
df = df.set_index('Date')
df
station name station number LST time T wet-bulb temperature (°C) dew_point_T relative humidity (%) wind_speed wind direction (degrees) Pressure pressure at sea level (hPa) ëîåú òððéí ëåììú(÷åã) ëîåú òððéí ðîåëéí(÷åã) âåáä áñéñ òððéí ðîåëéí(÷åã) ñåâ äòððéí äðîåëéí(÷åã) ñåâ äòððéí äáéðåðééí(÷åã) ñåâ äòððéí äâáåäéí(÷åã) îæâ àååéø ðåëçé(÷åã) îæâ àååéø ùçìó(÷åã) øàåú àô÷éú(÷åã)
Date
2020-01-01 áéú ãâï ... 2523 02:00 7.9 7.2 6.4 90 1.7 117.0 1015.0 1018.8 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2020-01-01 áéú ãâï ... 2523 05:00 7.5 7.0 6.4 93 1.2 116.0 1014.3 1018.1 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2020-01-01 áéú ãâï ... 2523 08:00 8.6 8.3 8.0 96 1.1 107.0 1014.4 1018.2 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2020-01-01 áéú ãâï ... 2523 11:00 15.9 13.1 10.6 71 2.4 196.0 1013.7 1017.4 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2020-01-01 áéú ãâï ... 2523 14:00 18.1 14.0 10.4 61 2.8 264.0 1011.6 1015.3 NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-01-31 áéú ãâï ... 2523 11:00 19.0 13.7 8.9 52 5.6 235.0 1013.6 1017.3 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2021-01-31 áéú ãâï ... 2523 14:00 19.2 14.7 11.0 59 4.6 252.0 1013.0 1016.7 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2021-01-31 áéú ãâï ... 2523 17:00 18.2 14.8 12.2 68 0.8 203.0 1013.3 1017.0 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2021-01-31 áéú ãâï ... 2523 20:00 13.1 12.3 11.7 91 1.2 79.0 1014.4 1018.2 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2021-01-31 áéú ãâï ... 2523 23:00 10.8 10.6 10.3 97 1.7 111.0 1015.1 1018.9 NaN NaN NaN NaN NaN NaN NaN NaN NaN

3172 rows × 20 columns

df2 = pd.read_csv('bet-dagan-day-gigit.csv', encoding = 'unicode_escape', na_values=["-"])
df2 = df2.rename(columns=name_conversion_dictionary)
df2['Date'] = pd.to_datetime(df2['Date'], dayfirst=True)
df2 = df2.set_index('Date')
df2
station name station number èîôøèåøú î÷ñéîåí(C°) èîôøèåøú îéðéîåí(C°) èîôøèåøú îéðéîåí ìéã ä÷ø÷ò(C°) îùê æäéøú ùîù(ã÷åú) pan evaporation (mm) ÷åã äúàãåú éåîéú()
Date
2020-01-01 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN 0.8 0.0
2020-01-02 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN NaN NaN
2020-01-03 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN NaN NaN
2020-01-04 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN NaN NaN
2020-01-05 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN 2.4 0.0
... ... ... ... ... ... ... ... ...
2021-01-27 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN 2.5 0.0
2021-01-28 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN 1.2 0.0
2021-01-29 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN NaN NaN
2021-01-30 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN NaN NaN
2021-01-31 áéú ãâï îàåéùú ... 2520 NaN NaN NaN NaN 2.6 0.0

397 rows × 8 columns

df3 = pd.read_csv('bet-dagan-krina.csv', encoding = 'unicode_escape', na_values=["-"])
df3 = df3.rename(columns=name_conversion_dictionary)
df3['Date'] = pd.to_datetime(df3['Date'], dayfirst=True)
df3 = df3.set_index('Date')
df3 = df3.replace({"éùéøä": "direct",
                   "îôåæøú": "diffuse",
                   "âìåáàìéú": "global"})
df3['daily_radiation_MJ_per_m2_per_day'] = df3.iloc[:, 3:].sum(axis=1)/1000
df_radiation = df3.loc[df3["radiation type"] == "global", "daily_radiation_MJ_per_m2_per_day"].to_frame()
df_radiation
daily_radiation_MJ_per_m2_per_day
Date
2020-01-01 10.0296
2020-01-02 4.3128
2020-01-03 11.6748
2020-01-04 1.6452
2020-01-05 6.8544
... ...
2021-01-27 12.2652
2021-01-28 7.1640
2021-01-29 7.2936
2021-01-30 9.3276
2021-01-31 13.5468

396 rows × 1 columns

Review of methods

There are a variety of ways to estimate evaporative flux in nature. The following table categorizes each method based on the data that must be acquired to apply it:

These methods also vary in the timescales in which they are relevant, typically in correlation with the variety of data needed:

  • Thornthwaite and SCS Blaney-Criddle: monthly or seasonal estimations (minimal data)
  • Jensen-Haise: 5-day estimates (good enough timescale and data for irrigation scheduling)
  • Penman: daily estimates
  • Penman-Monteith: hourly estimates (requires a lot of data)

Part 1: Thornthwaite estimation

The Thornthwaite estimation is an entirely empirical model based on mean monthly temperature, alone.

$$ \begin{equation} E_p = 16\left[ \frac{10\,T_{avg}}{I} \right]^a, \end{equation} $$

where $$ \begin{equation} I = \sum_{i=1}^{12} \left[ \frac{T_{avg}}{5} \right]^{1.514}, \end{equation} $$ and $$ \begin{align} a &= 6.75\times 10^{-7}I^3 \\ &+ 7.71\times 10^{-5}I^2 \nonumber\\ &+ 1.792\times 10^{-2}I \nonumber\\ &+ 0.49239 \nonumber \end{align} $$

  • $E_p$ is the monthly potential ET (mm)
  • $T_{avg}$ is the mean monthly temperature in °C
  • $I$ is a heat index
  • $a$ is a location-dependent coefficient

In Part 1, we will start with plotting evaporation through the Thornthwaite model.

  • Download csv weather file from campus site or elsewhere
  • Extract the mean monthly temperatures from your data (your choice: the mean of monthly averages for multiple years, or the mean temperature for a single month of a given year from the average of daily temperatures in that month)
  • Incorporate mean monthly temperature values into the Thornthwaite model

Prepare your Tavg data for the Thornthwaite equation. Below, daily temperature data from March 1st, 2020, to February 28th, 2021, is indexed and organized into monthly means. The data source provided both Tmax and Tmin for each day, which is far more descriptive than the data that may be found from other sources. In this case, the daily then monthly mean was taken.

# note: dataframe has unnecessary rows removed. Issues previously with non-ascii coding, could not find "Date"

# new column of the average temperature, based on daily min/max
# df_th['Tmean'] = (df_th['Temp'] + df_th['Temp1'])/2  

# monthly data
df_th = (df['T'].resample('MS')  # MS assigns mean to first day in the month
                .mean()
                .to_frame()
        )
# we now add 14 days to the index, so that all monthly data is in the middle of the month
df_th.index = df_th.index + pd.DateOffset(days=14)
df_th
T
Date
2020-01-15 12.484274
2020-02-15 14.046983
2020-03-15 16.439113
2020-04-15 18.512500
2020-05-15 23.166532
2020-06-15 24.600000
2020-07-15 27.353226
2020-08-15 28.090323
2020-09-15 28.462500
2020-10-15 25.120161
2020-11-15 19.308475
2020-12-15 15.916129
2021-01-15 14.123790
# Preparing "I" for the Thornthwaite equation
I = np.sum( (df_th['T']/5)**(1.514) )

# Preparing "a" for the Thornthwaite equation
a = 6.75e-7 * I**3 - 7.71e-5 * I**2 + 1.792e-2 * I + 0.49239

# The final Thornthwaite model for monthly potential ET (mm)
df_th['Ep'] = 16*((10*df_th['T']/I)**a)
df_th
T Ep
Date
2020-01-15 12.484274 20.163427
2020-02-15 14.046983 27.179636
2020-03-15 16.439113 40.472053
2020-04-15 18.512500 54.671821
2020-05-15 23.166532 96.461219
2020-06-15 24.600000 112.296873
2020-07-15 27.353226 146.898516
2020-08-15 28.090323 157.128632
2020-09-15 28.462500 162.453109
2020-10-15 25.120161 118.406386
2020-11-15 19.308475 60.820862
2020-12-15 15.916129 37.291178
2021-01-15 14.123790 27.557481
fig, ax = plt.subplots(1, figsize=(10,7))
ax.plot(df_th['Ep'])
ax.set(xlabel="date",
       ylabel="evaporation (mm)",
       title="Thornthwaite evaporation");

Part 2: Penman

The Penman model is almost entirely a theory based formula for predicting evaporative flux. It can run on a much finer timescale, and requires a much wider variety of data than most models. In addition to temperature, the Penman functions on measurements of radiation, wind speed, elevation above sea level, vapour-pressure deficit, and heat flux density to the ground.

$$ \begin{equation} E_{tp} = \frac{\Delta}{\Delta+\gamma}Q_{ne}+ \frac{\gamma}{\Delta+\gamma}E_A \end{equation} $$

$Q_n$ is the available energy flux density:

$$ \begin{equation} Q_n = R_n - G, \end{equation} $$

and $E_A$ is the drying power of the air:

$$ \begin{equation} E_A = f_e(u)(e_s - e_d). \end{equation} $$

The various components of the equations above are:

$$ \begin{equation} \Delta = 0.200 \cdot (0.00738\,T + 0.8072)^7 - 0.000116 \end{equation} $$

$$ \begin{equation} \gamma = \frac{c_p\, P}{0.622 \lambda} \end{equation} $$

$$ \begin{equation} P = 101.3-0.01055 H \end{equation} $$

$$ \begin{equation} \lambda = 2.501 - 2.361\times 10^{-3}\,T \end{equation} $$

$$ \begin{equation} f_e(u) = 1.0 + 0.53\, u_2 \end{equation} $$

$$ \begin{equation} G = 4.2\frac{T_{i+1}-T_{i-1}}{\Delta t} \end{equation} $$

$$ \begin{equation} e_s = \exp \left[ \frac{16.78\, T -116.9}{T+237.3} \right] \end{equation} $$

$$ \begin{equation} e_d = e_s \frac{RH}{100} \end{equation} $$ where $\Delta t$ is the time in days between midpoints of time periods $i+1$ and $i−1$, and $T$ is the air temperature (°C).

  • $\Delta$: slope of the saturation water vapor pressure curve (kPa °C$^{-1}$)
  • $\gamma$: psychrometric constant (kPA °C$^{-1}$)
  • $c_p=0.001013$: specific heat of water at constant pressure (kJ kg$^{-1}$ °C$^{-1}$)
  • $P$: atmospheric pressure (kPa)
  • $H$: elevation above sea level (m)
  • $\lambda$: latent heat of vaporization (MJ kg$^{-1}$)
  • $R_n$: net radiation (MJ m$^{-2} d^{-1}$)
  • $G$: heat flux density to the ground (MJ m$^{-2} d^{-1}$)
  • $u_{2}$: wind speed measured 2 m above ground (m s$^{-1}$)
  • $e_{s} - e_{d}$: vapor pressure deficit (kPa)
  • $e_{s}$: saturation vapor pressure (kPa)
  • $e_{d}$: actual vapor pressure (kPa)
# note: na_values=["-"] necessary to replace dashes in csv with none value that is ignored when assessing daily mean.
# df_pen = pd.read_csv('Penman_data_2020_beit_dagan.csv', encoding = 'unicode_escape', na_values=["-"])

# note: now we have hourly data, so the index will have to compound daily values. Our data source, however, gave a 
# single temperature value, so the mean will not involve the average of daily Tmax and Tmin.

# df_pen['Date'] = pd.to_datetime(df_pen['Date'], dayfirst=True)
# df_pen = df_pen.set_index('Date')

# Resampling hourly data over same day and taking mean, to obtain daily averages
df_pen = (df['T'].resample('D')
                 .mean()
                 .to_frame()
         )
df_pen['dew_point'] = (df['dew_point_T'].resample('D')
                                        .mean()
                      )
df_pen['u'] = (df['wind_speed'].resample('D')
                               .mean()
              ) 
df_pen['P'] = (df['Pressure'].resample('D')
                             .mean()
              )/10
df_pen
T dew_point u P
Date
2020-01-01 12.3625 9.0625 1.5250 101.30875
2020-01-02 11.9750 9.8250 1.9250 101.20125
2020-01-03 13.0500 4.9750 5.1750 101.37125
2020-01-04 10.8625 6.6875 5.5625 101.15500
2020-01-05 12.9375 9.2125 4.5625 101.23625
... ... ... ... ...
2021-01-27 13.8125 8.2375 1.8875 100.83750
2021-01-28 14.4000 10.2250 3.5250 101.12750
2021-01-29 12.3500 7.9125 5.0250 101.22125
2021-01-30 12.9625 7.6500 4.4250 101.49500
2021-01-31 15.0625 8.3125 3.7500 101.32500

397 rows × 4 columns

# With Tavg for every day of the year, we can now solve for daily latent heat of vaporization, the slope of the 
# saturation-vapour pressure-temperature curve, and the heat flux density to the ground (Eq 5, 6, and 9)

def lambda_latent_heat(T):
    """daily latent heat of vaporization (MJ/kg)"""
    return 2.501 - 2.361e-3*T

def Delta(T):
    """slope of saturation-vapor curve (kPa/°C)"""
    return 0.2000*(0.00738*T + 0.8072)**7 - 0.000116

def G(T):
    """heat flux density to the ground, G (MJ/m2/d)"""
    return 4.2*np.gradient(T.values)

cp = 0.001013
df_pen['lambda'] = lambda_latent_heat(df_pen['T'])
df_pen['Delta'] = Delta(df_pen['T'])
df_pen['G'] = G(df_pen['T'])
df_pen['gamma'] = (cp*df_pen['P'])/(0.622*df_pen['lambda'])
df_pen['f_wind'] = 1.0 + 0.53 * df_pen['u']
df_pen
T dew_point u P lambda Delta G gamma f_wind
Date
2020-01-01 12.3625 9.0625 1.5250 101.30875 2.471812 0.094385 -1.62750 0.066750 1.808250
2020-01-02 11.9750 9.8250 1.9250 101.20125 2.472727 0.092300 1.44375 0.066654 2.020250
2020-01-03 13.0500 4.9750 5.1750 101.37125 2.470189 0.098185 -2.33625 0.066835 3.742750
2020-01-04 10.8625 6.6875 5.5625 101.15500 2.475354 0.086530 -0.23625 0.066553 3.948125
2020-01-05 12.9375 9.2125 4.5625 101.23625 2.470455 0.097554 4.25250 0.066739 3.418125
... ... ... ... ... ... ... ... ... ...
2021-01-27 13.8125 8.2375 1.8875 100.83750 2.468389 0.102551 4.77750 0.066532 2.000375
2021-01-28 14.4000 10.2250 3.5250 101.12750 2.467002 0.106028 -3.07125 0.066760 2.868250
2021-01-29 12.3500 7.9125 5.0250 101.22125 2.471842 0.094317 -3.01875 0.066691 3.663250
2021-01-30 12.9625 7.6500 4.4250 101.49500 2.470396 0.097694 5.69625 0.066911 3.345250
2021-01-31 15.0625 8.3125 3.7500 101.32500 2.465437 0.110070 8.82000 0.066933 2.987500

397 rows × 9 columns

# Radiation: Rn from Rso, Rs, Rbo, and Rb. [can provide equations for these if needed]

# Rso: mean solar radiation from a cloudless sky (based on latitude)
# MJ/m2/d
Rso_monthly = np.array([17.46, 21.65, 25.96, 29.85,
                        32.11, 33.20, 32.66, 30.44,
                        26.67, 22.48, 18.30, 16.04])
# Rs: solar radiation received on the earth's surface (based on sensors)
# Rs originally in kWh m-2 d-1; *3600 for hours, 10^-3 to convert to MJ m-2 d-1
# Rs_monthly = 3.6 * np.array([3.27, 3.68, 4.13, 5.31,
#                              6.09, 7.55, 7.06, 6.76,
#                              5.82, 4.93, 4.19, 3.58])
# Rs_monthly = np.multiply(Rs_kWh, 3.600)    
# create empty columns
df_pen["Rso_monthly"] = ""

df_pen["Rs"] = df_radiation["daily_radiation_MJ_per_m2_per_day"]

# every day in the month will have the same values for Rs, Rso
# syntax: df.loc[row_indices, column_names]
# df[df.index.month == 1] returns only the rows with January dates, etc...
for i in range(12):
#     df_pen.loc[df_pen.index.month==(i+1), "Rs_monthly"] = Rs_monthly[i]
    df_pen.loc[df_pen.index.month==(i+1), "Rso_monthly"] = Rso_monthly[i]
df_pen
T dew_point u P lambda Delta G gamma f_wind Rso_monthly Rs
Date
2020-01-01 12.3625 9.0625 1.5250 101.30875 2.471812 0.094385 -1.62750 0.066750 1.808250 17.46 10.0296
2020-01-02 11.9750 9.8250 1.9250 101.20125 2.472727 0.092300 1.44375 0.066654 2.020250 17.46 4.3128
2020-01-03 13.0500 4.9750 5.1750 101.37125 2.470189 0.098185 -2.33625 0.066835 3.742750 17.46 11.6748
2020-01-04 10.8625 6.6875 5.5625 101.15500 2.475354 0.086530 -0.23625 0.066553 3.948125 17.46 1.6452
2020-01-05 12.9375 9.2125 4.5625 101.23625 2.470455 0.097554 4.25250 0.066739 3.418125 17.46 6.8544
... ... ... ... ... ... ... ... ... ... ... ...
2021-01-27 13.8125 8.2375 1.8875 100.83750 2.468389 0.102551 4.77750 0.066532 2.000375 17.46 12.2652
2021-01-28 14.4000 10.2250 3.5250 101.12750 2.467002 0.106028 -3.07125 0.066760 2.868250 17.46 7.1640
2021-01-29 12.3500 7.9125 5.0250 101.22125 2.471842 0.094317 -3.01875 0.066691 3.663250 17.46 7.2936
2021-01-30 12.9625 7.6500 4.4250 101.49500 2.470396 0.097694 5.69625 0.066911 3.345250 17.46 9.3276
2021-01-31 15.0625 8.3125 3.7500 101.32500 2.465437 0.110070 8.82000 0.066933 2.987500 17.46 13.5468

397 rows × 11 columns

from: Ward & Trimble, "Environmental Hydrology", 2nd Edition, page 99.

$R_n$ is net radiation, the balance between net short wave $R_s$ and the long wave $R_b$ components of the radiation: $$R_n = (1-\alpha)R_s\!\! \downarrow -R_b \!\! \uparrow,$$ where $\alpha$ (dimensionless) is the albedo.

$$R_b = \left( a\frac{R_s}{R_{so}+b} \right)R_{bo}$$

$$R_{bo} = \epsilon\, \sigma\, T^4_{Kelvin}$$ where $\sigma=4.903\times 10^{-9}$ MJ m$^{-2}$ d$^{-1}$ K$^{-4}$, and $\epsilon$ is net net emissivity: $$\epsilon=-0.02+0.261 \exp\left(-7.77\times10^{-4}T_{Celcius}\right)$$

# Stefan-Boltzmann constant
sigma = 4.903e-9
# emissivity = 1 - 0.261**(-7.77e-4 * df_pen['T'][0]**2)
emissivity = -0.02 + 0.261 * np.exp(-7.77e-4 * df_pen['T']**2)

# Rbo: net longwave radiation for clear skies, otherwise known as diffuse radiation or emitted radiation from the
# atmosphere - 'how hot is it?'
Rbo = emissivity*sigma*((df_pen['T']+273)**4)

# net outgoing long-wave radiation (note: Rs/Rso = proportion of how clear the day is)

# for humid areas, a=1.0 and b=0
# for arid areas, a=1.2 and b=-0.2
# for semihumid areas, a=1.1 and b=-0.1
a = 1.2
b = -0.2
Rb = (a*df_pen['Rs']/df_pen['Rso_monthly'] + b)*Rbo   

# α is the albedo, or short-wave reflectance (dimensionless)
# = 0.23 for most green crops with a full cover
# = 0.04 for fresh asphalt
# = 0.12 for worn-out asphalt
# = 0.55 for fresh concrete

alpha = 0.23

# net radiation
Rn = (1 - alpha) * df_pen['Rs'] - Rb   # (MJ/m2/d)
df_pen['Rn'] = Rn
df_pen
T dew_point u P lambda Delta G gamma f_wind Rso_monthly Rs Rn
Date
2020-01-01 12.3625 9.0625 1.5250 101.30875 2.471812 0.094385 -1.62750 0.066750 1.808250 17.46 10.0296 4.353657
2020-01-02 11.9750 9.8250 1.9250 101.20125 2.472727 0.092300 1.44375 0.066654 2.020250 17.46 4.3128 2.655308
2020-01-03 13.0500 4.9750 5.1750 101.37125 2.470189 0.098185 -2.33625 0.066835 3.742750 17.46 11.6748 4.863603
2020-01-04 10.8625 6.6875 5.5625 101.15500 2.475354 0.086530 -0.23625 0.066553 3.948125 17.46 1.6452 1.870445
2020-01-05 12.9375 9.2125 4.5625 101.23625 2.470455 0.097554 4.25250 0.066739 3.418125 17.46 6.8544 3.419377
... ... ... ... ... ... ... ... ... ... ... ... ...
2021-01-27 13.8125 8.2375 1.8875 100.83750 2.468389 0.102551 4.77750 0.066532 2.000375 17.46 12.2652 5.070152
2021-01-28 14.4000 10.2250 3.5250 101.12750 2.467002 0.106028 -3.07125 0.066760 2.868250 17.46 7.1640 3.539126
2021-01-29 12.3500 7.9125 5.0250 101.22125 2.471842 0.094317 -3.01875 0.066691 3.663250 17.46 7.2936 3.541485
2021-01-30 12.9625 7.6500 4.4250 101.49500 2.470396 0.097694 5.69625 0.066911 3.345250 17.46 9.3276 4.159035
2021-01-31 15.0625 8.3125 3.7500 101.32500 2.465437 0.110070 8.82000 0.066933 2.987500 17.46 13.5468 5.524102

397 rows × 12 columns

# vapor pressure deficit = VPD

def vp_sat(T):
    return np.exp((16.78*T - 116.9)/(T + 237.3))

def vp(T):
    """calculate vapour pressure from temperature"""
    return 6.11*10*(7.5*T)/(237.3 + T)    


df_pen['e'] = vp(df_pen['dew_point'])
df_pen['es'] = vp(df_pen['T'])
df_pen['VPD'] = df_pen['es'] - df_pen['e']
df_pen
T dew_point u P lambda Delta G gamma f_wind Rso_monthly Rs Rn e es VPD
Date
2020-01-01 12.3625 9.0625 1.5250 101.30875 2.471812 0.094385 -1.62750 0.066750 1.808250 17.46 10.0296 4.353657 16.856829 22.691095 5.834266
2020-01-02 11.9750 9.8250 1.9250 101.20125 2.472727 0.092300 1.44375 0.066654 2.020250 17.46 4.3128 2.655308 18.218741 22.014016 3.795275
2020-01-03 13.0500 4.9750 5.1750 101.37125 2.470189 0.098185 -2.33625 0.066835 3.742750 17.46 11.6748 4.863603 9.409942 23.887208 14.477266
2020-01-04 10.8625 6.6875 5.5625 101.15500 2.475354 0.086530 -0.23625 0.066553 3.948125 17.46 1.6452 1.870445 12.560262 20.058392 7.498130
2020-01-05 12.9375 9.2125 4.5625 101.23625 2.470455 0.097554 4.25250 0.066739 3.418125 17.46 6.8544 3.419377 17.125412 23.691930 6.566518
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2021-01-27 13.8125 8.2375 1.8875 100.83750 2.468389 0.102551 4.77750 0.066532 2.000375 17.46 12.2652 5.070152 15.373759 25.206145 9.832386
2021-01-28 14.4000 10.2250 3.5250 101.12750 2.467002 0.106028 -3.07125 0.066760 2.868250 17.46 7.1640 3.539126 18.929830 26.216925 7.287095
2021-01-29 12.3500 7.9125 5.0250 101.22125 2.471842 0.094317 -3.01875 0.066691 3.663250 17.46 7.2936 3.541485 14.786779 22.669287 7.882508
2021-01-30 12.9625 7.6500 4.4250 101.49500 2.470396 0.097694 5.69625 0.066911 3.345250 17.46 9.3276 4.159035 14.311543 23.735340 9.423797
2021-01-31 15.0625 8.3125 3.7500 101.32500 2.465437 0.110070 8.82000 0.066933 2.987500 17.46 13.5468 5.524102 15.508995 27.351095 11.842099

397 rows × 15 columns

# Check on data: magnitudes, trends, etc.

# plot variable to see its trend throughout the year
to_plot = df_pen['Rn']
plt.plot(to_plot)
plt.gcf().autofmt_xdate()
plt.ylim([0, to_plot.max()])

# can review how each variable fluctuates throughout the year.
# notes: latent heat: p. constant; slope: flat bell; psychro_c p. constant; Rn strong bell, right skewed; 
# G erratic with -'ve values and less change towards Rn peak; wind high in winter/spring, else low; total_v slight bell, but many irregularities 
(0.0, 18.82062281118612)
# Now that all variables have been defined, daily E_penman can be calculated.

def E_penman(df):
    T = df['T']
    Delta = df['Delta']
    gamma = df['gamma']
    Rn = df['Rn']
    G = df['G']
    EA = df['f_wind'] * df['VPD']
    return (Delta / (Delta + gamma))*(Rn - G) + (Delta / (Delta + gamma))*EA

# daily_data
df_pen['E_penman'] = E_penman(df_pen)
plt.plot(df_pen['E_penman'])
plt.gcf().autofmt_xdate()
df2['pan evaporation (mm)']
Date
2020-01-01    0.8
2020-01-02    NaN
2020-01-03    NaN
2020-01-04    NaN
2020-01-05    2.4
             ... 
2021-01-27    2.5
2021-01-28    1.2
2021-01-29    NaN
2021-01-30    NaN
2021-01-31    2.6
Name: pan evaporation (mm), Length: 397, dtype: float64
fig, ax = plt.subplots(1, 1, figsize=(10,7))
ax.plot(df_pen['E_penman'], color="tab:blue", label="Penman", linewidth=2)
ax.plot(df_th['Ep'], color="tab:orange", label="Thornthwaite", linewidth=2)
ax.plot(1*df2['pan evaporation (mm)'], color="tab:purple", label="pan", linewidth=2)


ax.set(xlabel="date",
       ylabel="evaporation (mm)")
ax.legend()

# Overall, in Penmnan, we see a trend of a somewhat right-skewed bell, with high variability in winter and spring. Ep predicted by
# the Thornthwaite model is largely skewed to the right, reaching a peak towards September, and Ep is a magnitude larger than 
# Etp. Since this model was empirically designed for the typical temperature conditions in north-eastern US, it is easy to see how 
# this over-simplification with T and regional irrelevance causes critical insight into the variability and trends in the 
# Mediterranean climate to be lost. 
<matplotlib.legend.Legend at 0x7ff9e9453e90>
fig, ax = plt.subplots(1, 1, figsize=(10,7))
ax.plot(df_pen['T'], color="tab:blue", label="Penman", linewidth=2)
ax.plot(df_th['T'], color="tab:orange", label="Thornthwaite", linewidth=2)
ax.set(xlabel="date",
       ylabel="temperature (°C)")
ax.legend()

# What do we see?

# From this simple temperature comparison, we can see the daily variability in the data compared to its monthly mean.
# It is evident that using less precise data means missing extreme daily fluctuations which may relate to more rare climate 
# events. For example, the large jump in the graph in May represents six days in May 2020 (16th-21st) when the recorded max
# temperatures reached above 38C, with five out of six reaching above 40C. This was indeed a historic heat wave that was 
# widely reported on, as daily consecutive temperatures in the center of Israel in March never reached this magnitude
# in the country's lifetime.

# These are the rare climate events that are linked to climate change that are only expected to become less 'rare';
# this means that our data analysis skills and modelling must become more sensitive to contend, which, ultimately, means 
# that we must know how to work with a LOT of data in an organized and comprehensible way!
<matplotlib.legend.Legend at 0x7ff9b8d3e510>
# # To view the month of May 2020
# start_date = "2020-5-1"
# end_date = "2020-5-31"

# after_start_date = df["date"] >= start_date
# before_end_date = df["date"] <= end_date
# between_two_dates = after_start_date & before_end_date
# filtered_dates = df.loc[between_two_dates]

# print(filtered_dates)
# Using Penman...
# Simple exercise: Plot rainfall data and compare rainfall input to daily predicted Etp. Further, plot their cumulative 
# functions.

# Tougher exercise: Collect data for multiple years (example, past decade), and plot each year's Etp and rainfall. Compare
# decade mean of each, and comment on the water balance throughout the decade.