Weather data

Contents

Weather data#

[2]:

import os
import pandas as pd
from eocrops.inputs.meteoblue import WeatherDownload
from eocrops.climatools.format_data import WeatherPostprocess

import geopandas as gpd
from shapely.geometry import Point

Read the file#

[3]:

print(os.getcwd())
input_file = pd.read_csv("../data/burkina_dataframe.csv")
input_file["geometry"] = [
    Point(x, y) for x, y in zip(input_file["Longitude"], input_file["Latitude"])
]
input_file = gpd.GeoDataFrame(input_file)

input_file["Id_location"] = input_file["Id_location"].astype(str)
input_file["timestamp"] = [
    (f"{year}-01-01", f"{year}-12-31") for year in input_file["Year"]
]

input_file.head()

C:\Users\s999379\OneDrive - Syngenta\Documents\git-repo\eo-crops\examples

C:\Users\s999379\Miniconda3\envs\eo-crops\lib\site-packages\pandas\core\dtypes\cast.py:128: ShapelyDeprecationWarning: The array interface is deprecated and will no longer work in Shapely 2.0. Convert the '.coords' to a numpy array instead.
  arr = construct_1d_object_array_from_listlike(values)

[3]:

	Year	Id_location	Latitude	Longitude	geometry	timestamp
0	2010	200026	14.0333	-0.0333	POINT (-0.03330 14.03330)	(2010-01-01, 2010-12-31)
1	2010	200107	11.7333	-2.9167	POINT (-2.91670 11.73330)	(2010-01-01, 2010-12-31)
2	2010	200140	10.3333	-3.1833	POINT (-3.18330 10.33330)	(2010-01-01, 2010-12-31)
3	2010	200099	11.1667	-4.3000	POINT (-4.30000 11.16670)	(2010-01-01, 2010-12-31)
4	2010	200089	12.0667	0.3500	POINT (0.35000 12.06670)	(2010-01-01, 2010-12-31)

Download data using meteoblue API#

Define the query#

[5]:

# Define your query (https://docs.meteoblue.com/en/weather-apis/dataset-api/dataset-api)
## Daily statistics to extract
AGGREGATION = "mean"

## Query of variables
query = [
    {
        "domain": "ERA5",
        "gapFillDomain": "ERA5",
        "timeResolution": "daily",
        "codes": [
            {
                "code": 52,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Relative Humidity
            {
                "code": 11,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # air temperature (°C)
            {
                "code": 32,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Wind Speed
            {
                "code": 256,
                "level": "sfc",
                "aggregation": AGGREGATION,
            },  # Diffuse Shortwave Radiation
            {
                "code": 56,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Vapor Pressure Deficit
            {
                "code": 260,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # FAO Reference Evapotranspiration,
            {
                "code": 261,
                "level": "sfc",
                "aggregation": AGGREGATION,
            },  # Evapotranspiration
            {
                "code": 52,
                "level": "2 m above gnd",
                "aggregation": AGGREGATION,
            },  # Relative humidity
        ],
    }
]

[ ]:

import nest_asyncio

nest_asyncio.apply()
## TODO : time interval subset by observations
time_interval = ("2017-01-01", "2017-12-31")

pipeline_cehub = WeatherDownload(
    api_key="",  # Please put your meteoblue API here
    shapefile=input_file,
    id_column="Id_location",
    timestamp_column="timestamp",
)

df_output = pipeline_cehub.execute(query=query);

[9]:

df_output.head(5)

[9]:

	location	lat	lon	asl	variable	unit	level	timeResolution	aggregation	timestamp	value	FD3C7D1A-5E1B-471B-9447-46EA81E18476
1	200026	13.91100	0.00000	294.786	Relative Humidity	%	2 m above gnd	daily	mean	20100101T0000	19.291204	FD3C7D1A-5E1B-471B-9447-46EA81E18476
2	200026	13.91100	0.00000	294.786	Relative Humidity	%	2 m above gnd	daily	mean	20100102T0000	17.51828	FD3C7D1A-5E1B-471B-9447-46EA81E18476
3	200026	13.91100	0.00000	294.786	Relative Humidity	%	2 m above gnd	daily	mean	20100103T0000	17.148224	FD3C7D1A-5E1B-471B-9447-46EA81E18476
4	200026	13.91100	0.00000	294.786	Relative Humidity	%	2 m above gnd	daily	mean	20100104T0000	18.082785	FD3C7D1A-5E1B-471B-9447-46EA81E18476
5	200026	13.91100	0.00000	294.786	Relative Humidity	%	2 m above gnd	daily	mean	20100105T0000	19.80562	FD3C7D1A-5E1B-471B-9447-46EA81E18476

[10]:

df_output.to_csv("../data/weather_burkina.csv", index=False)

Format the data for machine learning purposes.#

[11]:

AGGREGATION = "mean"
df_output = pd.read_csv("../data/weather_burkina.csv")

pipeline_refactor = WeatherPostprocess(
    shapefile=input_file,
    id_column="Id_location",
    resample_range=(
        "-01-01",
        "-12-31",
        1,  # You can even resample it using fixed periods of day (e.g. every 8 day)
    ),
)

df_mean = pipeline_refactor.execute(
    df_weather=df_output, stat=AGGREGATION, return_pivot=False
)

C:\Users\s999379\Miniconda3\envs\eo-crops\lib\site-packages\eocrops\climatools\format_data.py:160: FutureWarning: Not prepending group keys to the result index of transform-like apply. In the future, the group keys will be included in the index, regardless of whether the applied function returns a like-indexed object.
To preserve the previous behavior, use

        >>> .groupby(..., group_keys=False)

To adopt the future behavior and silence this warning, use

        >>> .groupby(..., group_keys=True)
  df[["period", "location"]]

[16]:

# Period is the number of day from the beginning of the query
## Name of the variables
print(df_mean.variable.unique())
## Check the dataframe ready-to-use
df_mean.head()

['Diffuse Shortwave Radiation' 'Evapotranspiration'
 'FAO Reference Evapotranspiration' 'Relative Humidity' 'Temperature'
 'Vapor Pressure Deficit' 'Wind Speed']

[16]:

	variable	period	Id_location	Year	mean_value
0	Diffuse Shortwave Radiation	0.0	200001	2010	56.000000
32	Diffuse Shortwave Radiation	1.0	200001	2010	54.916668
64	Diffuse Shortwave Radiation	2.0	200001	2010	55.958332
96	Diffuse Shortwave Radiation	3.0	200001	2010	56.125000
128	Diffuse Shortwave Radiation	4.0	200001	2010	55.958332

[13]:

# Check the results
import seaborn as sns

df_temperature = df_mean[df_mean["variable"] == "Temperature"]
sns.lineplot(
    x="period", y=f"{AGGREGATION}_value", hue="Id_location", data=df_temperature
)

[13]:

<Axes: xlabel='period', ylabel='mean_value'>

../../_images/eocrops_examples_Weather_data_13_1.png

[ ]: