Source code for chap_core.assessment.metrics.crps
"""
Continuous Ranked Probability Score (CRPS) metrics.
"""
import numpy as np
import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec
[docs]
class DetailedCRPS(MetricBase):
"""
Detailed Continuous Ranked Probability Score (CRPS) metric.
Does not group - gives one CRPS value per location/time_period/horizon_distance combination.
CRPS measures both calibration and sharpness of probabilistic forecasts.
"""
spec = MetricSpec(
output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance),
metric_name="CRPS",
metric_id="detailed_crps",
description="CRPS per location, time period and horizon",
)
[docs]
def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
# Merge observations with forecasts on location and time_period
merged = forecasts.merge(
observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner"
)
# Group by location, time_period, and horizon_distance to compute CRPS
results = []
for (location, time_period, horizon), group in merged.groupby(["location", "time_period", "horizon_distance"]):
# Get all sample values for this combination
sample_values = group["forecast"].values
# Get the observation (should be the same for all samples)
obs_value = group["disease_cases"].iloc[0]
# Calculate CRPS using the formula from database.py
# CRPS = E[|X - obs|] - 0.5 * E[|X - X'|]
term1 = np.mean(np.abs(sample_values - obs_value))
term2 = 0.5 * np.mean(np.abs(sample_values[:, None] - sample_values[None, :]))
crps = float(term1 - term2)
results.append(
{"location": location, "time_period": time_period, "horizon_distance": horizon, "metric": crps}
)
return pd.DataFrame(results)
[docs]
class CRPSPerLocation(MetricBase):
"""
Continuous Ranked Probability Score (CRPS) metric aggregated by location.
Groups by location to give average CRPS per location across all time periods and horizons.
"""
spec = MetricSpec(
output_dimensions=(DataDimension.location,),
metric_name="CRPS",
metric_id="crps_per_location",
description="Average CRPS per location",
)
[docs]
def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
# First compute detailed CRPS
detailed_crps_metric = DetailedCRPS()
detailed_results = detailed_crps_metric.compute(observations, forecasts)
# Aggregate by location
location_crps = detailed_results.groupby("location", as_index=False)["metric"].mean()
return location_crps
[docs]
class CRPS(MetricBase):
"""
Continuous Ranked Probability Score (CRPS) metric for the entire dataset.
Gives one CRPS value across all locations, time periods and horizons.
"""
spec = MetricSpec(
output_dimensions=(), metric_name="CRPS", metric_id="crps", description="Overall CRPS across entire dataset"
)
[docs]
def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
# First compute CRPS per location
crps_per_location_metric = CRPSPerLocation()
location_results = crps_per_location_metric.compute(observations, forecasts)
# Aggregate across all locations to get overall CRPS
overall_crps = location_results["metric"].mean()
return pd.DataFrame({"metric": [overall_crps]})