Source code for chap_core.assessment.metrics.crps_norm
"""
Normalized Continuous Ranked Probability Score (CRPS) metrics.
"""
import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec
from chap_core.assessment.metrics.crps import DetailedCRPS
[docs]
class DetailedCRPSNorm(MetricBase):
"""
Detailed Normalized Continuous Ranked Probability Score (CRPS) metric.
Does not group - gives one normalized CRPS value per location/time_period/horizon_distance combination.
CRPS is normalized by the range of observed values to make it comparable across different scales.
"""
spec = MetricSpec(
output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance),
metric_name="CRPS Normalized",
metric_id="detailed_crps_norm",
description="Normalized CRPS per location, time period and horizon",
)
[docs]
def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
# First compute regular CRPS for each location/time_period/horizon combination
detailed_crps_metric = DetailedCRPS()
detailed_crps_results = detailed_crps_metric.compute(observations, forecasts)
# Calculate normalization factor based on range of all observed values
obs_values = observations["disease_cases"].values
obs_min, obs_max = obs_values.min(), obs_values.max()
obs_range = obs_max - obs_min
# Avoid division by zero if all observations are the same
if obs_range == 0:
# If all observations are identical, normalized CRPS is just the regular CRPS
detailed_crps_results["metric"] = detailed_crps_results["metric"]
else:
# Normalize CRPS by the range of observations
detailed_crps_results["metric"] = detailed_crps_results["metric"] / obs_range
return detailed_crps_results
[docs]
class CRPSNorm(MetricBase):
"""
Normalized Continuous Ranked Probability Score (CRPS) metric aggregated by location.
Groups by location to give average normalized CRPS per location across all time periods and horizons.
"""
spec = MetricSpec(
output_dimensions=(DataDimension.location,),
metric_name="CRPS Normalized",
metric_id="crps_norm",
description="Average normalized CRPS per location",
)
[docs]
def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
# First compute detailed normalized CRPS
detailed_crps_norm_metric = DetailedCRPSNorm()
detailed_results = detailed_crps_norm_metric.compute(observations, forecasts)
# Aggregate by location
location_crps_norm = detailed_results.groupby("location", as_index=False)["metric"].mean()
return location_crps_norm