Precipitation (IMD)

For estimating precipitation, the EQIPA platform uses high-resolution daily gridded rainfall data provided by the India Meteorological Department (IMD). The dataset spans a long period (1901–Present) and offers daily precipitation values across India at a 0.25° x 0.25° spatial resolution.

Dataset Overview

Source: IMD Daily Gridded Rainfall Data
Format: NetCDF (.nc)
Period of Use: 2018–2023 crop years
Spatial Resolution: 0.25° x 0.25°
Temporal Resolution: Daily
Output Format Used in EQIPA: Monthly GeoTIFF

📌 Data Processing Summary

Daily NetCDF files are downloaded from IMD.
Each file is converted to daily GeoTIFF rasters.
These daily rasters are aggregated to produce monthly precipitation maps.
Annual precipitation (per crop year) is computed by summing monthly rasters from June to May (e.g., June 2022 – May 2023).

Download Annual NetCDF from IMD

# pcp_imd_download.py
import requests
import os
import re
from tqdm import tqdm

# Create a folder to store downloaded files
download_folder = "imd_netcdf_files"
os.makedirs(download_folder, exist_ok=True)

firstyear = 2023
lastyear = 2024



# Define the URL
url = "https://www.imdpune.gov.in/cmpg/Griddata/RF25.php"

# Loop over the years for which you wish to download the netCDF files
for year in range(firstyear, lastyear + 1):  # Adjust the range as needed
    payload = {"RF25": str(year)}
    print(f"Downloading netCDF file for year {year} ...")

    try:
        # Post the request with the payload
        response = requests.post(url, data=payload, stream=True)
        response.raise_for_status()  # Raise an error for bad responses

        # Determine filename (using a default if not provided in headers)
        filename = f"RF25_{year}.nc"

        # Get total file size from headers (if available) for the progress bar
        total_size = int(response.headers.get('content-length', 0))
        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True, desc=filename)

        output_path = os.path.join(download_folder, filename)
        with open(output_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    progress_bar.update(len(chunk))
        progress_bar.close()

        # Check if the download completed correctly
        if total_size != 0 and progress_bar.n != total_size:
            print(f"WARNING: Download size mismatch for {filename}")
        else:
            print(f"Downloaded {filename} successfully.")

    except requests.RequestException as e:
        print(f"Failed to download netCDF file for year {year}: {e}")

Convert NetCDF to Daily GeoTIFFs

The following Python script converts daily precipitation values from IMD NetCDF files to GeoTIFF format using xarray and rioxarray.

# pcp_imd_daily.py
# pcp_imd_daily.py
import xarray as xr
import rioxarray
import os
from osgeo import gdal

firstyear = 2023
lastyear = 2024

input_folder = "imd_netcdf_files"

output_folder = "pcp_imd_daily"
os.makedirs(output_folder, exist_ok=True)


for year in range(firstyear, lastyear + 1):
    # Construct the full path to the netCDF file using f-string and os.path.join
    input_nc = os.path.join(input_folder, f"RF25_{year}.nc")

    # Open the netCDF file
    ds = xr.open_dataset(input_nc)
    # print(ds.variables)
    print(year)

    if year>2023:
        variable_name = "rf"
        time_var="time"
    else:
        variable_name = "RAINFALL"
        time_var="TIME"

    da = ds[variable_name]

    # Loop over each day using the "TIME" coordinate
    for day in da.coords[time_var]:
        daily_data = da.sel({time_var: day})
        daily_data.rio.write_crs("EPSG:4326", inplace=True)

        day_str = str(day.values).split("T")[0]
        raw_tif = os.path.join(output_folder, f"imd_pcp_{day_str}_raw.tif")
        final_tif = os.path.join(output_folder, f"imd_pcp_{day_str}.tif")

        # Export raw GeoTIFF
        daily_data.rio.to_raster(raw_tif)


        # Use GDAL Warp to ensure north-up and EPSG:4326
        gdal.Warp(
            destNameOrDestDS=final_tif,
            srcDSOrSrcDSTab=raw_tif,
            dstSRS='EPSG:4326',
            resampleAlg='bilinear',
            format='GTiff'
        )

        os.remove(raw_tif)  # Clean up intermediate
        print(f"Saved raw: {final_tif}")

Aggregate Daily GeoTIFFs to Monthly

This script uses rasterio and numpy to aggregate daily rasters into monthly precipitation maps by summing values.

# pcp_imd_monthly.py
import os
import glob
import rasterio
import numpy as np

firstyear = 2023
lastyear = 2024

output_folder = "pcp_imd_monthly"
os.makedirs(output_folder, exist_ok=True)

input_folder = "pcp_imd_daily"

for year in range(firstyear, lastyear + 1):
    for month in range(1, 13):
        # Daily files are assumed to be named: imd_pcp_YYYY-MM-DD.tif
        pattern = os.path.join(input_folder, f"imd_pcp_{year}-{month:02d}-*.tif")
        daily_files = sorted(glob.glob(pattern))

        if not daily_files:
            print(f"No daily files found for {year}-{month:02d}")
            continue

        daily_arrays = []
        meta = None
        nodata_val = None

        # Loop over daily files and read data as float32.
        # Replace no-data values with np.nan for summing.
        for daily_file in daily_files:
            with rasterio.open(daily_file) as src:
                data = src.read(1).astype(np.float32)
                if meta is None:
                    meta = src.meta.copy()
                    nodata_val = src.nodata
                    if nodata_val is None:
                        # If nodata isn't defined, set a default value (e.g., -9999)
                        nodata_val = -9999
                        meta.update(nodata=nodata_val)
                # Replace nodata with np.nan so it won't affect the sum
                data[data == nodata_val] = np.nan
                daily_arrays.append(data)

        # Stack daily arrays along a new axis
        stack = np.stack(daily_arrays, axis=0)
        monthly_sum = np.nansum(stack, axis=0)
        # Identify pixels that are no-data in all daily files
        all_nan_mask = np.all(np.isnan(stack), axis=0)
        monthly_sum[all_nan_mask] = nodata_val

        # Update metadata: ensure data type and single band output
        meta.update(dtype=rasterio.float32, count=1)
        output_filename = os.path.join(output_folder, f"imd_pcp_m_{year}_{month:02d}.tif")

        with rasterio.open(output_filename, 'w', **meta) as dst:
            dst.write(monthly_sum, 1)

        print(f"Saved monthly raster: {output_filename}")

Aggregate Monthly GeoTIFFs to Annual

# pcp_imd_annual.py
import os
import numpy as np
import rasterio

firstyear = 2023
lastyear=2024

output_folder = "pcp_imd_annual"
os.makedirs(output_folder, exist_ok=True)

input_folder = "pcp_imd_monthly"

for year in range(firstyear, lastyear):
    # Annual period: June of current year to May of next year.
    monthly_files = []

    # June to December for the current year
    for month in range(6, 13):
        file_path = os.path.join(input_folder, f"imd_pcp_m_{year}_{month:02d}.tif")
        monthly_files.append(file_path)

    # January to May for the next year
    for month in range(1, 6):
        file_path = os.path.join(input_folder, f"imd_pcp_m_{year+1}_{month:02d}.tif")
        monthly_files.append(file_path)


    monthly_arrays = []
    meta = None
    nodata_val = None

    for monthly_file in monthly_files:
        with rasterio.open(monthly_file) as src:
            data = src.read(1).astype(np.float32)
            if meta is None:
                meta = src.meta.copy()
                nodata_val = src.nodata
                if nodata_val is None:
                    # If nodata is not defined, set a default value (e.g., -9999)
                    nodata_val = -9999
                    meta.update(nodata=nodata_val)
            # Replace nodata values with np.nan so they don't affect the sum
            data[data == nodata_val] = np.nan
            monthly_arrays.append(data)

    # Stack monthly arrays and compute the sum, ignoring NaNs
    stack = np.stack(monthly_arrays, axis=0)
    annual_sum = np.nansum(stack, axis=0)

    # For pixels that are nan in every month, set back to nodata
    all_nan_mask = np.all(np.isnan(stack), axis=0)
    annual_sum[all_nan_mask] = nodata_val

    meta.update(dtype=rasterio.float32, count=1)
    output_filename = os.path.join(output_folder, f"imd_pcp_a_{year}_{year+1}.tif")
    with rasterio.open(output_filename, 'w', **meta) as dst:
        dst.write(annual_sum, 1)

    print(f"Saved annual raster: {output_filename}")