Skip to content

Zonal Statistics for Raster Time Series

This script computes zonal statistics (mean values) of a time-series of raster files (e.g., WaPOR AETI rasters) over vector polygon boundaries (e.g. districts, basins). The result is a CSV table with one row per polygon feature (e.g. districts, basins) and one column per raster file.


import geopandas as gpd
import rasterio
from rasterstats import zonal_stats
import pandas as pd
import os


# Path to the folder containing raster files
raster_files_folder = '/Users/amanchaudhary/Documents/Resources/World_Bank/Zambia/WaPOR_ETb_A'
geojson_boundary = "/Users/amanchaudhary/Documents/Resources/World_Bank/Zambia/Shapefile/Zambia_L2.geojson" 
output_csv= "/Users/amanchaudhary/Documents/Resources/World_Bank/Zambia/ZonalStats/WaPOR_ETb_A.csv"   


# Load shapefile
zones_features = gpd.read_file(geojson_boundary)

# Dictionary to store zonal statistics for each month
stats_by_file = {}

# Iterate over raster files in the folder
for filename in os.listdir(raster_files_folder):
    if filename.endswith('.tif'):
        print(filename)

        raster_file = os.path.join(raster_files_folder, filename)
        with rasterio.open(raster_file) as src:
            affine = src.transform
            stats = zonal_stats(zones_features.geometry, src.read(1), affine=affine, stats=['mean'], nodata=src.nodata)
            stats_by_file[filename] = [
                round(stat["mean"], 2) if stat["mean"] is not None else None
                for stat in stats
            ]


# Build DataFrame with zonal stats
df_stats = pd.DataFrame(stats_by_file)

# Merge all attributes from the GeoJSON (except geometry)
zones_attr = zones_features.drop(columns="geometry")
df = pd.concat([zones_attr, df_stats], axis=1)

# Ensure clean order: attributes first, then raster columns sorted
attr_cols = [c for c in zones_attr.columns]  # all attributes (index + properties)
raster_cols = sorted(df_stats.columns)
df = df[attr_cols + raster_cols]

# Save CSV (no geometry included)
df.to_csv(output_csv, index=False)