Thermal time

Contents

Thermal time#

[1]:

import os
import copy
import datetime

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = [5, 2]
from eocrops.inputs.meteoblue import WeatherDownload
from eocrops.climatools.format_data import WeatherPostprocess
from eocrops.climatools import resampling
from eocrops.tasks import curve_fitting
from eocrops.utils import base_functions as utils
from shapely.geometry import Point

/home/johann/anaconda3/envs/eo-crops/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

Load the file#

[12]:

# Define the path to get store the data
PATH_EXPERIMENT = os.path.join("../data", "experiments")
# Dates of interest
TIME_INTERVAL = ("2017-08-15", "2018-07-15")

YEAR = TIME_INTERVAL[-1].split("-")[0]

if not os.path.exists(PATH_EXPERIMENT):
    os.mkdir(PATH_EXPERIMENT)

[13]:

# Read the dataset
## Read the excel file
sentinel_data = pd.read_excel("../data/rapeseed_data_s1.xlsx")
sentinel_data = sentinel_data.sort_values(by=["ID", "band"])
## Define the features
features_data = [("DATA", f, f, "float32", 1) for f in sentinel_data["band"].unique()]

[14]:

## Define the meta data
### Get the informations regarding the location
meta_data = sentinel_data[["ID", "lon", "lat"]].drop_duplicates()
### Add in the metadata file the dates of interest in the time series
meta_data["timestamp"] = [TIME_INTERVAL]
### Convert the coordinates into Point
meta_data["geometry"] = [
    Point(x, y) for x, y in zip(meta_data["lon"], meta_data["lat"])
]
feature_vector = [(col_name, "string") for col_name in meta_data.columns]
np.save(file=f"{PATH_EXPERIMENT}/meta_data.npy", arr=meta_data.values)

/home/johann/anaconda3/envs/eo-crops/lib/python3.9/site-packages/pandas/core/dtypes/cast.py:128: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.
  arr = construct_1d_object_array_from_listlike(values)

[45]:

## Define the 3D array from Sentinel-1 data
### Keep only band values
sentinel_bands = sentinel_data.drop(["ID", "lon", "lat", "band"], axis=1)
dates = sentinel_bands.columns
sentinel_values = sentinel_bands.values
### Reshape in 3D array
array_data = sentinel_values.reshape(
    sentinel_values.shape[0] // len(features_data),
    len(features_data),
    sentinel_values.shape[1],
)
array_data = np.moveaxis(array_data, 1, 2)

[49]:

# Compute RVI=4*VH/(VV+VH)
RVI = 4 * array_data[..., 0] / (array_data[..., 0] + array_data[..., 1])
plt.plot(RVI.flatten())

[49]:

[<matplotlib.lines.Line2D at 0x7f8c0c160a60>]

../../_images/eocrops_examples_Thermal_time_7_1.png

[47]:

### Check if the reshape worked well
np.alltrue(sentinel_data.iloc[0, :].values == array_data[0, :, 0])
### Plot the first observation
plt.plot(array_data[0, :, 0])
plt.show()

/tmp/ipykernel_45200/3585828634.py:2: DeprecationWarning: elementwise comparison failed; this will raise an error in the future.
  np.alltrue(sentinel_data.iloc[0, :].values == array_data[0, :, 0])

../../_images/eocrops_examples_Thermal_time_8_1.png

[48]:

## Get the day of the year of the samples
dates = [datetime.datetime.strptime(str(k), "%Y%m%d") for k in dates]
## Subset for the season of interest
### Usually for rapeseed, season start 15 August previous year
start_season = datetime.datetime.strptime(TIME_INTERVAL[0], "%Y-%m-%d")
### And harvest is up to mid-July
end_season = datetime.datetime.strptime(TIME_INTERVAL[1], "%Y-%m-%d")

subset_dates = [k for k in dates if k >= start_season and k <= end_season]
len(subset_dates)

[48]:

Check if we have a fixed number of days between acquisition dates#

[18]:

# Check the dates and resample the data over fixed periods
def days_since_first(dates):
    """Compute number of days since the start of the season"""
    first_date = dates[0]
    days_since = [(d - first_date).days for d in dates]
    return np.array(days_since)


def is_evenly_spaced(lst, step):
    """Check if day of the year are evenly spaced"""
    for i in range(1, len(lst)):
        if lst[i] - lst[i - 1] != step:
            print(lst[i])
            return False
    return True


# Day of the year since start of season (15th August)
subset_dates_doy = days_since_first(subset_dates)
array_data = array_data[:, : len(subset_dates_doy), :]
# Check if doy are evenly spaced, otherwise we need to resample
print(is_evenly_spaced(subset_dates_doy, subset_dates_doy[1] - subset_dates_doy[0]))

144
False

[19]:

# Resample the data since we do not have regular dates
curve_fit = curve_fitting.CurveFitting(
    range_doy=(subset_dates_doy[0], subset_dates_doy[-1])
)

new_dates_doy, array_resample = curve_fit.resample_ts(
    doy=subset_dates_doy,
    ts_mean_=array_data,
    resampling=6,  # Resample every 6 days
)
# Check the results : any changes
plt.plot(subset_dates_doy, array_data[0, :, 1])
plt.plot(new_dates_doy, array_resample[0, :, 1])
plt.show()

../../_images/eocrops_examples_Thermal_time_12_0.png

[37]:

# Smooth the signal
print(array_resample.shape)

array_resample_vh = curve_fit.fit_whitakker(array_resample[0, :], degree_smoothing=2)

# Check the results : any changes
plt.plot(array_resample[0, :, 0])
plt.plot(array_resample_vh[:, 0])
plt.legend(["Original", "Smoothed"])
plt.show()

(1, 56, 2)

../../_images/eocrops_examples_Thermal_time_13_1.png

[62]:

# Save the satellite data
np.save(file=f"{PATH_EXPERIMENT}/satellite_data.npy", arr=array_resample)

Extract weather data#

[ ]:

# Extract the weather data
## Download the data using meteoblue API
import nest_asyncio

nest_asyncio.apply()

pipeline_cehub = WeatherDownload(
    api_key="",  # Please put your meteoblue API here
    shapefile=meta_data,
    id_column="ID",  # column from the meta_data with the ID of the field
    timestamp_column="timestamp",  # timestamp of interest
)

query_base = [
    {"domain": "ERA5T", "gapFillDomain": "ERA5", "timeResolution": "daily", "codes": []}
]

query_sum = copy.deepcopy(query_base)
query_sum[0]["codes"].extend(
    [
        {
            "code": 731,
            "level": "2 m above gnd",
            "aggregation": "sum",
            "gddBase": 0,  # Tmin for rapeseed
            "gddLimit": 30,  # Tmax for rapeseed
        }
    ]
)
weather_data = pipeline_cehub.execute(query=query_sum)
weather_data.to_csv(f"{PATH_EXPERIMENT}/GDD_data_raw.csv", index=False)

[57]:

weather_data.head(2)

[57]:

	location	lat	lon	asl	variable	unit	level	timeResolution	aggregation	timestamp	value	69019A3B-92C5-4EA8-AA93-A7F1098B4BD3
1	1	45.50000	-0.50000	51.251	Growing Degree days daily max/min	GDDc	2 m above gnd	daily	none	20170815T0000	20.619999	69019A3B-92C5-4EA8-AA93-A7F1098B4BD3
2	1	45.50000	-0.50000	51.251	Growing Degree days daily max/min	GDDc	2 m above gnd	daily	none	20170816T0000	20.814999	69019A3B-92C5-4EA8-AA93-A7F1098B4BD3

[60]:

## Format the data to compute accumulated GDUs
weather_data = pd.read_csv(f"{PATH_EXPERIMENT}/GDD_data_raw.csv")

pipeline_refactor = WeatherPostprocess(
    shapefile=meta_data, id_column="ID", timestamp_column="timestamp"
)
daily_gdus = pipeline_refactor.execute(
    df_weather=weather_data, stat="sum", return_pivot=True
)
daily_gdus.to_csv(f"{PATH_EXPERIMENT}/GDD_data.csv", index=False)

/home/johann/anaconda3/envs/eo-crops/lib/python3.9/site-packages/eocrops/climatools/format_data.py:160: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  df[["period", "location"]]

Thermal time resampling#

[63]:

# Thermal time resampling
gdu = resampling.TempResampling(
    range_dates=TIME_INTERVAL,  #  extracted from weather data
    stop=TIME_INTERVAL[
        -1
    ],  # Stopping date for yield prediction model (useful for in season)
    id_column="ID",  # Ids of the observations, which is the saving path of EOPatch
    smooth=True,  # Apply smoothing using sg filter
    period_nas_rate=0.2,
)

# You must load those 3 files that you did in the step data_loader to do the resampling (prepared before)
## Meta data
meta_data = gdu.load_meta_data(
    feature_vector=feature_vector, filepath=f"{PATH_EXPERIMENT}/meta_data.npy"
)
## Weather data
weather_data = gdu.load_weather_data(filepath=f"{PATH_EXPERIMENT}/GDD_data.csv")
## Satellite data
x_s1 = gdu.load_sat_data(filepath=f"{PATH_EXPERIMENT}/satellite_data.npy")

[64]:

# Define the parameters for temporal resampling
kwargs_input = dict(
    stat="mean",  # aggregation over GDD periods (mean)
    thermal_time=True,  # resample over thermal time? otherwise, calendar time
    features_data=features_data,
    increment=120,  # Number of GDDs per period (if thermal_time=False, you can specify 10 for 10-day period)
    period_length=6,
)

# Prepare the output file with VH and VV resampled over thermal time
ds = pd.DataFrame()
for _, _, fname_, _, num in features_data:
    # Resample satellite data for a given fname (VV or VH)
    output, _ = gdu.resample_s2(**kwargs_input, fname=fname_)
    # Contenate all the bands
    ds = utils.concatenate_outputs(ds, output, fname_, id_column="ID")

VH
VV

/home/johann/anaconda3/envs/eo-crops/lib/python3.9/collections/__init__.py:431: RuntimeWarning: coroutine 'WeatherDownload._get_jobIDs_from_query' was never awaited
  __new__ = eval(code, namespace)
RuntimeWarning: Enable tracemalloc to get the object allocation traceback
/home/johann/anaconda3/envs/eo-crops/lib/python3.9/site-packages/eocrops/utils/base_functions.py:100: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
  ds = ds.append(output)

[65]:

ds.head()

[65]:

	ID	(VH, 0)	(VH, 1)	(VH, 2)	(VH, 3)	(VH, 4)	(VH, 5)	(VH, 6)	(VH, 7)	(VH, 8)	...	(VV, 26)	(VV, 27)	(VV, 28)	(VV, 29)	(VV, 30)	(VV, 31)	(VV, 32)	(VV, 33)	(VV, 34)	(VV, 35)
0	1	0.020873	0.015591	0.012079	0.011867	0.015831	0.015476	0.01725	0.024987	0.023762	...	0.119587	0.127084	0.140267	0.127745	0.100579	0.094846	0.078625	0.046503	0.024062	0.023308

1 rows × 73 columns

[66]:

# Check the results after thermal resampling
cols_vv = [k for k in ds.columns if "VV" in k]
cols_vh = [k for k in ds.columns if "VH" in k]
plt.plot(ds[cols_vv].iloc[0, :].values)
plt.plot(ds[cols_vh].iloc[0, :].values)
plt.legend(["VV", "VH"])
plt.show()

../../_images/eocrops_examples_Thermal_time_23_0.png

[ ]: