Skip to content

Precipitation (IMD)

The India Meteorological Department (IMD) provides a high-resolution (0.25X0.25 degree) daily gridded rainfall dataset for India.


Dataset Overview


Download Annual NetCDF from IMD

# pcp_imd_download.py
import requests
import os
import re
from tqdm import tqdm

# Create a folder to store downloaded files
download_folder = "imd_netcdf_files"
os.makedirs(download_folder, exist_ok=True)

firstyear = 2018
lastyear = 2024



# Define the URL
url = "https://www.imdpune.gov.in/cmpg/Griddata/RF25.php"

# Loop over the years for which you wish to download the netCDF files
for year in range(firstyear, lastyear + 1):  # Adjust the range as needed
    payload = {"RF25": str(year)}
    print(f"Downloading netCDF file for year {year} ...")

    try:
        # Post the request with the payload
        response = requests.post(url, data=payload, stream=True)
        response.raise_for_status()  # Raise an error for bad responses

        # Determine filename (using a default if not provided in headers)
        filename = f"RF25_{year}.nc"

        # Get total file size from headers (if available) for the progress bar
        total_size = int(response.headers.get('content-length', 0))
        progress_bar = tqdm(total=total_size, unit='B', unit_scale=True, desc=filename)

        output_path = os.path.join(download_folder, filename)
        with open(output_path, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)
                    progress_bar.update(len(chunk))
        progress_bar.close()

        # Check if the download completed correctly
        if total_size != 0 and progress_bar.n != total_size:
            print(f"WARNING: Download size mismatch for {filename}")
        else:
            print(f"Downloaded {filename} successfully.")

    except requests.RequestException as e:
        print(f"Failed to download netCDF file for year {year}: {e}")

Convert NetCDF to Daily GeoTIFFs

The following Python script converts daily precipitation values from IMD NetCDF files to GeoTIFF format using xarray and rioxarray.

# pcp_imd_daily.py
# pcp_imd_daily.py
import xarray as xr
import rioxarray
import os
from osgeo import gdal

firstyear = 2023
lastyear = 2024

input_folder = "imd_netcdf_files"

output_folder = "pcp_imd_daily"
os.makedirs(output_folder, exist_ok=True)


for year in range(firstyear, lastyear + 1):
    # Construct the full path to the netCDF file using f-string and os.path.join
    input_nc = os.path.join(input_folder, f"RF25_{year}.nc")

    # Open the netCDF file
    ds = xr.open_dataset(input_nc)
    # print(ds.variables)
    print(year)

    if year>2023:
        variable_name = "rf"
        time_var="time"
    else:
        variable_name = "RAINFALL"
        time_var="TIME"

    da = ds[variable_name]

    # Loop over each day using the "TIME" coordinate
    for day in da.coords[time_var]:
        daily_data = da.sel({time_var: day})
        daily_data.rio.write_crs("EPSG:4326", inplace=True)

        day_str = str(day.values).split("T")[0]
        raw_tif = os.path.join(output_folder, f"imd_pcp_{day_str}_raw.tif")
        final_tif = os.path.join(output_folder, f"imd_pcp_{day_str}.tif")

        # Export raw GeoTIFF
        daily_data.rio.to_raster(raw_tif)


        # Use GDAL Warp to ensure north-up and EPSG:4326
        gdal.Warp(
            destNameOrDestDS=final_tif,
            srcDSOrSrcDSTab=raw_tif,
            dstSRS='EPSG:4326',
            resampleAlg='bilinear',
            format='GTiff'
        )

        os.remove(raw_tif)  # Clean up intermediate
        print(f"Saved raw: {final_tif}")

Aggregate Daily GeoTIFFs to Monthly

This script aggregate daily rasters into monthly precipitation maps by summing values.

# pcp_imd_monthly.py
import os
import glob
import rasterio
import numpy as np

firstyear = 2023
lastyear = 2024

output_folder = "pcp_imd_monthly"
os.makedirs(output_folder, exist_ok=True)

input_folder = "pcp_imd_daily"

for year in range(firstyear, lastyear + 1):
    for month in range(1, 13):
        # Daily files are assumed to be named: imd_pcp_YYYY-MM-DD.tif
        pattern = os.path.join(input_folder, f"imd_pcp_{year}-{month:02d}-*.tif")
        daily_files = sorted(glob.glob(pattern))

        if not daily_files:
            print(f"No daily files found for {year}-{month:02d}")
            continue

        daily_arrays = []
        meta = None
        nodata_val = None

        # Loop over daily files and read data as float32.
        # Replace no-data values with np.nan for summing.
        for daily_file in daily_files:
            with rasterio.open(daily_file) as src:
                data = src.read(1).astype(np.float32)
                if meta is None:
                    meta = src.meta.copy()
                    nodata_val = src.nodata
                    if nodata_val is None:
                        # If nodata isn't defined, set a default value (e.g., -9999)
                        nodata_val = -9999
                        meta.update(nodata=nodata_val)
                # Replace nodata with np.nan so it won't affect the sum
                data[data == nodata_val] = np.nan
                daily_arrays.append(data)

        # Stack daily arrays along a new axis
        stack = np.stack(daily_arrays, axis=0)
        monthly_sum = np.nansum(stack, axis=0)
        # Identify pixels that are no-data in all daily files
        all_nan_mask = np.all(np.isnan(stack), axis=0)
        monthly_sum[all_nan_mask] = nodata_val

        # Update metadata: ensure data type and single band output
        meta.update(dtype=rasterio.float32, count=1)
        output_filename = os.path.join(output_folder, f"imd_pcp_m_{year}_{month:02d}.tif")

        with rasterio.open(output_filename, 'w', **meta) as dst:
            dst.write(monthly_sum, 1)

        print(f"Saved monthly raster: {output_filename}")

Aggregate Monthly GeoTIFFs to Annual

# pcp_imd_annual.py
import os
import numpy as np
import rasterio

firstyear = 2023
lastyear=2024

output_folder = "pcp_imd_annual"
os.makedirs(output_folder, exist_ok=True)

input_folder = "pcp_imd_monthly"

for year in range(firstyear, lastyear):
    # Annual period: June of current year to May of next year.
    monthly_files = []

    # June to December for the current year
    for month in range(6, 13):
        file_path = os.path.join(input_folder, f"imd_pcp_m_{year}_{month:02d}.tif")
        monthly_files.append(file_path)

    # January to May for the next year
    for month in range(1, 6):
        file_path = os.path.join(input_folder, f"imd_pcp_m_{year+1}_{month:02d}.tif")
        monthly_files.append(file_path)


    monthly_arrays = []
    meta = None
    nodata_val = None

    for monthly_file in monthly_files:
        with rasterio.open(monthly_file) as src:
            data = src.read(1).astype(np.float32)
            if meta is None:
                meta = src.meta.copy()
                nodata_val = src.nodata
                if nodata_val is None:
                    # If nodata is not defined, set a default value (e.g., -9999)
                    nodata_val = -9999
                    meta.update(nodata=nodata_val)
            # Replace nodata values with np.nan so they don't affect the sum
            data[data == nodata_val] = np.nan
            monthly_arrays.append(data)

    # Stack monthly arrays and compute the sum, ignoring NaNs
    stack = np.stack(monthly_arrays, axis=0)
    annual_sum = np.nansum(stack, axis=0)

    # For pixels that are nan in every month, set back to nodata
    all_nan_mask = np.all(np.isnan(stack), axis=0)
    annual_sum[all_nan_mask] = nodata_val

    meta.update(dtype=rasterio.float32, count=1)
    output_filename = os.path.join(output_folder, f"imd_pcp_a_{year}_{year+1}.tif")
    with rasterio.open(output_filename, 'w', **meta) as dst:
        dst.write(annual_sum, 1)

    print(f"Saved annual raster: {output_filename}")