Source code for chap_core.assessment.metrics.percentile_coverage

"""
Percentile coverage metrics for evaluating forecast calibration.
"""

import numpy as np
import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec


[docs] class IsWithin10th90thDetailed(MetricBase): """ Detailed metric checking if observation falls within 10th-90th percentile of forecast samples. Does not group - gives one binary value (0 or 1) per location/time_period/horizon_distance combination. Returns 1 if observation is within the 10th-90th percentile range, 0 otherwise. """ spec = MetricSpec( output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance), metric_name="Within 10-90 Percentile", metric_id="is_within_10th_90th_detailed", description="Binary indicator if observation is within 10th-90th percentile per location, time period and horizon", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # Merge observations with forecasts on location and time_period merged = forecasts.merge( observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner" ) # Group by location, time_period, and horizon_distance to compute percentile coverage results = [] for (location, time_period, horizon), group in merged.groupby(["location", "time_period", "horizon_distance"]): # Get all sample values for this combination sample_values = group["forecast"].values # Get the observation (should be the same for all samples) obs_value = group["disease_cases"].iloc[0] # Calculate 10th and 90th percentiles of the samples low, high = np.percentile(sample_values, [10, 90]) # Check if observation falls within this range is_within_range = 1.0 if (low <= obs_value <= high) else 0.0 results.append( { "location": location, "time_period": time_period, "horizon_distance": horizon, "metric": is_within_range, } ) return pd.DataFrame(results)
[docs] class IsWithin25th75thDetailed(MetricBase): """ Detailed metric checking if observation falls within 25th-75th percentile of forecast samples. Does not group - gives one binary value (0 or 1) per location/time_period/horizon_distance combination. Returns 1 if observation is within the 25th-75th percentile range, 0 otherwise. """ spec = MetricSpec( output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance), metric_name="Within 25-75 Percentile", metric_id="is_within_25th_75th_detailed", description="Binary indicator if observation is within 25th-75th percentile", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # Merge observations with forecasts on location and time_period merged = forecasts.merge( observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner" ) # Group by location, time_period, and horizon_distance to compute percentile coverage results = [] for (location, time_period, horizon), group in merged.groupby(["location", "time_period", "horizon_distance"]): # Get all sample values for this combination sample_values = group["forecast"].values # Get the observation (should be the same for all samples) obs_value = group["disease_cases"].iloc[0] # Calculate 25th and 75th percentiles of the samples low, high = np.percentile(sample_values, [25, 75]) # Check if observation falls within this range is_within_range = 1.0 if (low <= obs_value <= high) else 0.0 results.append( { "location": location, "time_period": time_period, "horizon_distance": horizon, "metric": is_within_range, } ) return pd.DataFrame(results)
[docs] class RatioWithin10th90thPerLocation(MetricBase): """ Ratio of observations within 10th-90th percentile, aggregated by location. Groups by location to give the proportion of forecasts where observation fell within range. """ spec = MetricSpec( output_dimensions=(DataDimension.location,), metric_name="Ratio Within 10-90 Percentile", metric_id="ratio_within_10th_90th_per_location", description="Ratio of observations within 10th-90th percentile per location", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # First compute detailed metric detailed_metric = IsWithin10th90thDetailed() detailed_results = detailed_metric.compute(observations, forecasts) # Aggregate by location (mean of binary values gives ratio) location_ratios = detailed_results.groupby("location", as_index=False)["metric"].mean() return location_ratios
[docs] class RatioWithin10th90th(MetricBase): """ Overall ratio of observations within 10th-90th percentile for entire dataset. Gives one ratio value across all locations, time periods and horizons. """ spec = MetricSpec( output_dimensions=(), metric_name="Ratio Within 10-90 Percentile", metric_id="ratio_within_10th_90th", description="Overall ratio of observations within 10th-90th percentile", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # First compute ratio per location ratio_per_location_metric = RatioWithin10th90thPerLocation() location_results = ratio_per_location_metric.compute(observations, forecasts) # Aggregate across all locations to get overall ratio overall_ratio = location_results["metric"].mean() return pd.DataFrame({"metric": [overall_ratio]})
[docs] class RatioWithin25th75thPerLocation(MetricBase): """ Ratio of observations within 25th-75th percentile, aggregated by location. Groups by location to give the proportion of forecasts where observation fell within range. """ spec = MetricSpec( output_dimensions=(DataDimension.location,), metric_name="Ratio Within 25-75 Percentile", metric_id="ratio_within_25th_75th_per_location", description="Ratio of observations within 25th-75th percentile per location", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # First compute detailed metric detailed_metric = IsWithin25th75thDetailed() detailed_results = detailed_metric.compute(observations, forecasts) # Aggregate by location (mean of binary values gives ratio) location_ratios = detailed_results.groupby("location", as_index=False)["metric"].mean() return location_ratios
[docs] class RatioWithin25th75th(MetricBase): """ Overall ratio of observations within 25th-75th percentile for entire dataset. Gives one ratio value across all locations, time periods and horizons. """ spec = MetricSpec( output_dimensions=(), metric_name="Ratio Within 25-75 Percentile", metric_id="ratio_within_25th_75th", description="Overall ratio of observations within 25th-75th percentile", )
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # First compute ratio per location ratio_per_location_metric = RatioWithin25th75thPerLocation() location_results = ratio_per_location_metric.compute(observations, forecasts) # Aggregate across all locations to get overall ratio overall_ratio = location_results["metric"].mean() return pd.DataFrame({"metric": [overall_ratio]})