Source code for chap_core.data.open_dengue
from typing import Literal
from collections import Counter
import numpy as np
from dateutil.parser import parse
import pandas as pd
import pooch
from chap_core.time_period import Week
[docs]
class OpenDengueDataSet:
data_path = "https://github.com/OpenDengue/master-repo/raw/main/data/releases/V1.2.2/Temporal_extract_V1_2_2.zip"
def __init__(self):
self._filename = pooch.retrieve(self.data_path, None)
[docs]
def subset(
self, country_name: str, spatial_resolution: Literal["Admin1", "Admin2"] = "Admin1", temporal_resolution="Week"
):
country_name = country_name.upper()
df = pd.read_csv(self._filename, compression="zip")
df = df[df["adm_0_name"] == country_name.upper()]
df = df[df["T_res"] == temporal_resolution.capitalize()]
df = df[df["S_res"] == spatial_resolution.capitalize()]
return df
[docs]
def as_dataset(
self, country_name: str, spatial_resolution: Literal["Admin1", "Admin2"] = "Admin1", temporal_resolution="Week"
):
subset = self.subset(country_name, spatial_resolution, temporal_resolution)
if temporal_resolution == "Week":
dates = [parse(date) for date in subset["calendar_start_date"]]
weekdays = [date.weekday() for date in dates]
most_common_weekday = Counter(weekdays).most_common(1)[0][0]
mask = np.array([date.weekday() == most_common_weekday for date in dates])
subset = subset[mask]
subset["time_period"] = [Week(parse(date)).id for date in subset["calendar_start_date"]]
elif temporal_resolution == "Month":
dates = [parse(date).strftime("%Y-%m") for date in subset["calendar_start_date"]]
subset["time_period"] = dates
if spatial_resolution == "Admin1":
location_column = "adm_1_name"
else:
subset["location"] = subset["adm_1_name"] + "_" + subset["adm_2_name"]
location_column = "location"
s = subset.rename(
columns={location_column: "location", "time_period": "time_period", "dengue_total": "disease_cases"}
)
assert "disease_cases" in s.columns, f"No disease_cases column in {s.columns}"
return s[["location", "time_period", "disease_cases"]]