Source code for chap_core.assessment.metrics.rmse

"""
Root Mean Squared Error (RMSE) metrics.
"""

import pandas as pd
from chap_core.assessment.flat_representations import DataDimension, FlatForecasts, FlatObserved
from chap_core.assessment.metrics.base import MetricBase, MetricSpec


[docs] class RMSE(MetricBase): """ Root Mean Squared Error metric. Groups by location to give RMSE per location across all time periods and horizons. """ spec = MetricSpec(output_dimensions=(DataDimension.location,), metric_name="RMSE")
[docs] def compute(self, observations: FlatObserved, forecasts: FlatForecasts) -> pd.DataFrame: # Merge observations with forecasts on location and time_period merged = forecasts.merge( observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner" ) # Calculate squared error for each forecast merged["squared_error"] = (merged["forecast"] - merged["disease_cases"]) ** 2 # First average across samples for each location/time_period combination per_sample_mse = merged.groupby(["location", "time_period", "sample"], as_index=False)["squared_error"].mean() # Then average across all time periods and samples for each location location_mse = per_sample_mse.groupby("location", as_index=False)["squared_error"].mean() # Take square root to get RMSE location_mse["metric"] = location_mse["squared_error"] ** 0.5 # Return only the required columns return location_mse[["location", "metric"]]
[docs] class DetailedRMSE(MetricBase): """ Detailed Root Mean Squared Error metric. Does not group - gives one RMSE value per location/time_period/horizon_distance combination. This provides the highest resolution view of model performance. """ spec = MetricSpec( output_dimensions=(DataDimension.location, DataDimension.time_period, DataDimension.horizon_distance), metric_name="RMSE", description="Detailed RMSE", )
[docs] def compute(self, observations: pd.DataFrame, forecasts: pd.DataFrame) -> pd.DataFrame: # Merge observations with forecasts on location and time_period merged = forecasts.merge( observations[["location", "time_period", "disease_cases"]], on=["location", "time_period"], how="inner" ) # Calculate squared error for each forecast merged["squared_error"] = (merged["forecast"] - merged["disease_cases"]) ** 2 # Average across samples for each location/time_period/horizon combination detailed_mse = merged.groupby(["location", "time_period", "horizon_distance"], as_index=False)[ "squared_error" ].mean() # Take square root to get RMSE detailed_mse["metric"] = detailed_mse["squared_error"] ** 0.5 # Return only the required columns return detailed_mse[["location", "time_period", "horizon_distance", "metric"]]