Source code for chap_core.predictor.naive_predictor
import dataclasses
import numpy as np
from sklearn import linear_model
from chap_core.spatio_temporal_data.temporal_dataclass import (
DataSet,
TemporalDataclass,
)
from chap_core.datatypes import HealthData, ClimateHealthTimeSeries, ClimateData
[docs]
class NaivePredictor:
"""This should be a linear regression of prev cases and season"""
def __init__(self, lead_time=1):
self._average_cases = None
[docs]
def train(self, data: ClimateHealthTimeSeries):
self._average_cases = data.disease_cases.mean()
[docs]
def predict(self, future_climate_data: ClimateData) -> HealthData:
return HealthData(
future_climate_data.time_period,
np.full(len(future_climate_data), self._average_cases),
)
[docs]
class MultiRegionNaivePredictor:
"""TODO: This should be a linear regression of prev cases and season for each location."""
def __init__(self, *args, **kwargs):
self._training_stop = None
self._average_cases = None
def _get_mean(self, data):
y = data.disease_cases
y = y[~np.isnan(y)]
return y.mean()
# return data.disease_cases.mean()
[docs]
def train(self, data: DataSet[ClimateHealthTimeSeries]):
self._average_cases = {location: self._get_mean(data) for location, data in data.items()}
# self._buffer = next(iter(data.values())).time_period[-1]
[docs]
def predict(self, future_weather: DataSet[ClimateData]) -> HealthData:
prediction_dict = {
location: HealthData(entry.time_period[:1], np.full(1, self._average_cases[location]))
for location, entry in future_weather.items()
}
return DataSet(prediction_dict)
[docs]
class MultiRegionPoissonModel:
def __init__(self, *args, **kwargs):
self._training_stop = None
self._models = {}
self._saved_state = {}
def _create_feature_matrix(self, data: ClimateHealthTimeSeries):
lagged_values = data.disease_cases[:-1, None]
month = np.array([period.month for period in data.time_period])
season = month[1:, None] == np.arange(1, 13)
return np.hstack([lagged_values, season])
[docs]
def train(self, data: DataSet[ClimateHealthTimeSeries]):
for location, location_data in data.items():
X = self._create_feature_matrix(location_data)
y = location_data.disease_cases[1:]
mask = ~np.isnan(X).any(axis=1) & ~np.isnan(y)
assert mask[-1]
X = X[mask]
y = y[mask]
model = linear_model.PoissonRegressor()
model.fit(X, y)
self._models[location] = model
saved_data = location_data[-1:]
assert not np.any(np.isnan(saved_data.disease_cases)), f"{saved_data.disease_cases}"
self._saved_state[location] = TemporalDataclass(saved_data)
[docs]
def predict(self, data: DataSet[ClimateData]) -> DataSet[HealthData]:
prediction_dict = {}
for location, location_data in data.items():
state_values = self._saved_state[location]
# state_values = TemporalDataclass(location_data.data().__class__(**{field.name: getattr(state_values.data(), field.name) for field in dataclasses.fields(location_data.data())}))
location_data = TemporalDataclass(
state_values.data().__class__(
**{
field.name: getattr(location_data.data(), field.name)
for field in dataclasses.fields(location_data)
}
| {"disease_cases": np.full(len(location_data), 0)}
)
)
# location_data.data().disease_cases = np.full(len(location_data.data()), np.nan)
X = self._create_feature_matrix(state_values.join(location_data))
prediction = self._models[location].predict(X[-1:])
prediction_dict[location] = HealthData(location_data.time_period[:1], np.atleast_1d(prediction))
return DataSet(prediction_dict)
[docs]
class NaiveForecastSampler:
def __init__(self):
self._case_average = None
self._case_std = None
[docs]
def train(self, time_series: ClimateHealthTimeSeries):
self._case_average = time_series.disease_cases.mean()
self._case_std = time_series.disease_cases.std()
[docs]
def sample(self, weather_data: ClimateData, n_samples: int = 1) -> HealthData:
return HealthData(
weather_data.time_period,
np.random.normal(self._case_average, self._case_std, n_samples),
)