Source code for chap_core.assessment.metrics.rmse

"""
Root Mean Squared Error (RMSE) metrics.
"""

import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec



[docs]
class RMSE(MetricBase):
    """
    Root Mean Squared Error metric.
    Groups by location to give RMSE per location across all time periods and horizons.
    """

    spec = MetricSpec(output_dimensions=(DataDimension.location,), metric_name="RMSE")


[docs]
    def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame:
        # Merge observations with forecasts on location and time_period
        merged = forecasts.merge(
            observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner"
        )

        # Calculate squared error for each forecast
        merged["squared_error"] = (merged["forecast"] - merged["disease_cases"]) ** 2

        # First average across samples for each location/time_period combination
        per_sample_mse = merged.groupby(["location", "time_period", "sample"], as_index=False)["squared_error"].mean()

        # Then average across all time periods and samples for each location
        location_mse = per_sample_mse.groupby("location", as_index=False)["squared_error"].mean()

        # Take square root to get RMSE
        location_mse["metric"] = location_mse["squared_error"] ** 0.5

        # Return only the required columns
        return location_mse[["location", "metric"]]





[docs]
class DetailedRMSE(MetricBase):
    """
    Detailed Root Mean Squared Error metric.
    Does not group - gives one RMSE value per location/time_period/horizon_distance combination.
    This provides the highest resolution view of model performance.
    """

    spec = MetricSpec(
        output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance),
        metric_name="RMSE",
        description="Detailed RMSE",
    )


[docs]
    def compute(self, observations: pd.DataFrame, forecasts: pd.DataFrame) -> pd.DataFrame:
        # Merge observations with forecasts on location and time_period
        merged = forecasts.merge(
            observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner"
        )

        # Calculate squared error for each forecast
        merged["squared_error"] = (merged["forecast"] - merged["disease_cases"]) ** 2

        # Average across samples for each location/time_period/horizon combination
        detailed_mse = merged.groupby(["location", "time_period", "horizon_distance"], as_index=False)[
            "squared_error"
        ].mean()

        # Take square root to get RMSE
        detailed_mse["metric"] = detailed_mse["squared_error"] ** 0.5

        # Return only the required columns
        return detailed_mse[["location", "time_period", "horizon_distance", "metric"]]