Weather data#

[2]:
import os
import pandas as pd
from eocrops.inputs.meteoblue import WeatherDownload
from eocrops.climatools.format_data import WeatherPostprocess

import geopandas as gpd
from shapely.geometry import Point

Read the file#

[3]:
print(os.getcwd())
input_file = pd.read_csv("../data/burkina_dataframe.csv")
input_file["geometry"] = [
    Point(x, y) for x, y in zip(input_file["Longitude"], input_file["Latitude"])
]
input_file = gpd.GeoDataFrame(input_file)

input_file["Id_location"] = input_file["Id_location"].astype(str)
input_file["timestamp"] = [
    (f"{year}-01-01", f"{year}-12-31") for year in input_file["Year"]
]

input_file.head()
C:\Users\s999379\OneDrive - Syngenta\Documents\git-repo\eo-crops\examples
C:\Users\s999379\Miniconda3\envs\eo-crops\lib\site-packages\pandas\core\dtypes\cast.py:128: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.
  arr = construct_1d_object_array_from_listlike(values)
[3]:
Year Id_location Latitude Longitude geometry timestamp
0 2010 200026 14.0333 -0.0333 POINT (-0.03330 14.03330) (2010-01-01, 2010-12-31)
1 2010 200107 11.7333 -2.9167 POINT (-2.91670 11.73330) (2010-01-01, 2010-12-31)
2 2010 200140 10.3333 -3.1833 POINT (-3.18330 10.33330) (2010-01-01, 2010-12-31)
3 2010 200099 11.1667 -4.3000 POINT (-4.30000 11.16670) (2010-01-01, 2010-12-31)
4 2010 200089 12.0667 0.3500 POINT (0.35000 12.06670) (2010-01-01, 2010-12-31)

Download data using meteoblue API#

Define the query#

[5]:
# Define your query (https://docs.meteoblue.com/en/weather-apis/dataset-api/dataset-api)
## Daily statistics to extract
AGGREGATION = "mean"

## Query of variables
query = [
    {
        "domain": "ERA5",
        "gapFillDomain": "ERA5",
        "timeResolution": "daily",
        "codes": [
            {
                "code": 52,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Relative Humidity
            {
                "code": 11,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # air temperature (°C)
            {
                "code": 32,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Wind Speed
            {
                "code": 256,
                "level": "sfc",
                "aggregation": AGGREGATION,
            },  # Diffuse Shortwave Radiation
            {
                "code": 56,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Vapor Pressure Deficit
            {
                "code": 260,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # FAO Reference Evapotranspiration,
            {
                "code": 261,
                "level": "sfc",
                "aggregation": AGGREGATION,
            },  # Evapotranspiration
            {
                "code": 52,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Relative humidity
        ],
    }
]
[ ]:
import nest_asyncio

nest_asyncio.apply()
## TODO : time interval subset by observations
time_interval = ("2017-01-01", "2017-12-31")

pipeline_cehub = WeatherDownload(
    api_key="",  # Please put your meteoblue API here
    shapefile=input_file,
    id_column="Id_location",
    timestamp_column="timestamp",
)

df_output = pipeline_cehub.execute(query=query);
[9]:
df_output.head(5)
[9]:
location lat lon asl variable unit level timeResolution aggregation timestamp value FD3C7D1A-5E1B-471B-9447-46EA81E18476
1 200026 13.91100 0.00000 294.786 Relative Humidity % 2 m above gnd daily mean 20100101T0000 19.291204 FD3C7D1A-5E1B-471B-9447-46EA81E18476
2 200026 13.91100 0.00000 294.786 Relative Humidity % 2 m above gnd daily mean 20100102T0000 17.51828 FD3C7D1A-5E1B-471B-9447-46EA81E18476
3 200026 13.91100 0.00000 294.786 Relative Humidity % 2 m above gnd daily mean 20100103T0000 17.148224 FD3C7D1A-5E1B-471B-9447-46EA81E18476
4 200026 13.91100 0.00000 294.786 Relative Humidity % 2 m above gnd daily mean 20100104T0000 18.082785 FD3C7D1A-5E1B-471B-9447-46EA81E18476
5 200026 13.91100 0.00000 294.786 Relative Humidity % 2 m above gnd daily mean 20100105T0000 19.80562 FD3C7D1A-5E1B-471B-9447-46EA81E18476
[10]:
df_output.to_csv("../data/weather_burkina.csv", index=False)

Format the data for machine learning purposes.#

[11]:
AGGREGATION = "mean"
df_output = pd.read_csv("../data/weather_burkina.csv")

pipeline_refactor = WeatherPostprocess(
    shapefile=input_file,
    id_column="Id_location",
    resample_range=(
        "-01-01",
        "-12-31",
        1,  # You can even resample it using fixed periods of day (e.g. every 8 day)
    ),
)

df_mean = pipeline_refactor.execute(
    df_weather=df_output, stat=AGGREGATION, return_pivot=False
)
C:\Users\s999379\Miniconda3\envs\eo-crops\lib\site-packages\eocrops\climatools\format_data.py:160: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  df[["period", "location"]]
[16]:
# Period is the number of day from the beginning of the query
## Name of the variables
print(df_mean.variable.unique())
## Check the dataframe ready-to-use
df_mean.head()
['Diffuse Shortwave Radiation' 'Evapotranspiration'
 'FAO Reference Evapotranspiration' 'Relative Humidity' 'Temperature'
 'Vapor Pressure Deficit' 'Wind Speed']
[16]:
variable period Id_location Year mean_value
0 Diffuse Shortwave Radiation 0.0 200001 2010 56.000000
32 Diffuse Shortwave Radiation 1.0 200001 2010 54.916668
64 Diffuse Shortwave Radiation 2.0 200001 2010 55.958332
96 Diffuse Shortwave Radiation 3.0 200001 2010 56.125000
128 Diffuse Shortwave Radiation 4.0 200001 2010 55.958332
[13]:
# Check the results
import seaborn as sns

df_temperature = df_mean[df_mean["variable"] == "Temperature"]
sns.lineplot(
    x="period", y=f"{AGGREGATION}_value", hue="Id_location", data=df_temperature
)
[13]:
<Axes: xlabel='period', ylabel='mean_value'>
../../_images/eocrops_examples_Weather_data_13_1.png
[ ]: