Source code for chap_core.simulation.naive_simulator
"""
This is simulation code that can useful for creating tests. Currently in use.
todo: Can maybe be moved to tests/
"""
import abc
from typing import Any
import pydantic
import numpy as np
from numpy.random import normal, poisson
from chap_core.database.dataset_tables import DataSet, Observation
from chap_core.database.tables import BackTest, BackTestForecast
[docs]
class SimulationParams(pydantic.BaseModel):
loc: float = 5
scale: float = 2
[docs]
class DatasetDimensions(pydantic.BaseModel):
locations: list[str]
time_periods: list[str]
target: str = "disease_cases"
features: list[str] = []
[docs]
class Simulator(abc.ABC):
def __init__(self, params: SimulationParams):
self._params = params
[docs]
def simulate(self, data_dims: DatasetDimensions) -> DataSet: ...
[docs]
class AdditiveSimulator(Simulator):
def __init__(self, params: SimulationParams = SimulationParams()):
self._params = params
[docs]
def generate_raw(self, data_dims: DatasetDimensions) -> np.ndarray:
location_offsets = normal(0, 1, len(data_dims.locations))
x = np.arange(len(data_dims.time_periods)) / 12 * 2 * np.pi
time_pattern = np.sin(x)
mu = location_offsets[:, None] + time_pattern[None, :]
with_noise = normal(mu, 0.1)
values = with_noise * self._params.scale + self._params.loc
return values
[docs]
def simulate(self, data_dims: DatasetDimensions) -> DataSet:
feature_names = data_dims.features + [data_dims.target]
observations = []
for feature_name in feature_names:
observations.extend(self.simulate_observations(data_dims, feature_name))
return DataSet(
name="Simulated DataSet", covariates=data_dims.features + [data_dims.target], observations=observations
)
[docs]
def simulate_observations(self, data_dims: DatasetDimensions, feature_name) -> list[Observation]:
values = self.generate_raw(data_dims)
values = np.exp(values).astype(int)
observations = [
Observation(
period=data_dims.time_periods[time_idx],
org_unit=data_dims.locations[loc_idx],
feature_name=feature_name,
value=int(values[loc_idx, time_idx]),
)
for loc_idx in range(len(data_dims.locations))
for time_idx in range(len(data_dims.time_periods))
]
return observations
[docs]
class ForecastParams(pydantic.BaseModel):
prediction_length: int = 3
n_samples: int = 100
n_splits: int = 2
[docs]
class BacktestSimulator:
def __init__(self, params: ForecastParams = ForecastParams()):
self._params = params
[docs]
def simulate(self, dataset: DataSet, dataset_dims: DatasetDimensions) -> BackTest:
periods = dataset_dims.time_periods[-(self._params.prediction_length + self._params.n_splits - 1) :]
split_periods = periods[: self._params.n_splits]
backtest = BackTest(
dataset=dataset, model_id="Naive Forecast", org_units=dataset_dims.locations, split_periods=split_periods
)
forecasts = []
for i in range(self._params.n_splits):
forecasts.extend(self.simulate_split(dataset, periods[i : i + self._params.prediction_length]))
backtest.forecasts = forecasts
return backtest
[docs]
def simulate_split(self, dataset: DataSet, periods: list[str]) -> list[Any]:
forecasts = []
split_period = periods[0]
for observation in dataset.observations:
if observation.period not in periods:
continue
rate = normal(observation.value, observation.value, size=self._params.n_samples)
rate = np.maximum(rate, 0)
samples = poisson(rate).astype(float)
forecasts.append(
BackTestForecast(
values=samples.tolist(),
last_seen_period=split_period,
last_train_period=split_period,
**observation.model_dump(),
)
)
return forecasts