Source code for chap_core.assessment.metrics.crps_norm

"""
Normalized Continuous Ranked Probability Score (CRPS) metrics.
"""

import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec
from chap_core.assessment.metrics.crps import DetailedCRPS



[docs]
class DetailedCRPSNorm(MetricBase):
    """
    Detailed Normalized Continuous Ranked Probability Score (CRPS) metric.
    Does not group - gives one normalized CRPS value per location/time_period/horizon_distance combination.
    CRPS is normalized by the range of observed values to make it comparable across different scales.
    """

    spec = MetricSpec(
        output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance),
        metric_name="CRPS Normalized",
        metric_id="detailed_crps_norm",
        description="Normalized CRPS per location, time period and horizon",
    )


[docs]
    def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
        # First compute regular CRPS for each location/time_period/horizon combination
        detailed_crps_metric = DetailedCRPS()
        detailed_crps_results = detailed_crps_metric.compute(observations, forecasts)

        # Calculate normalization factor based on range of all observed values
        obs_values = observations["disease_cases"].values
        obs_min, obs_max = obs_values.min(), obs_values.max()
        obs_range = obs_max - obs_min

        # Avoid division by zero if all observations are the same
        if obs_range == 0:
            # If all observations are identical, normalized CRPS is just the regular CRPS
            detailed_crps_results["metric"] = detailed_crps_results["metric"]
        else:
            # Normalize CRPS by the range of observations
            detailed_crps_results["metric"] = detailed_crps_results["metric"] / obs_range

        return detailed_crps_results





[docs]
class CRPSNorm(MetricBase):
    """
    Normalized Continuous Ranked Probability Score (CRPS) metric aggregated by location.
    Groups by location to give average normalized CRPS per location across all time periods and horizons.
    """

    spec = MetricSpec(
        output_dimensions=(DataDimension.location,),
        metric_name="CRPS Normalized",
        metric_id="crps_norm",
        description="Average normalized CRPS per location",
    )


[docs]
    def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
        # First compute detailed normalized CRPS
        detailed_crps_norm_metric = DetailedCRPSNorm()
        detailed_results = detailed_crps_norm_metric.compute(observations, forecasts)

        # Aggregate by location
        location_crps_norm = detailed_results.groupby("location", as_index=False)["metric"].mean()

        return location_crps_norm