Source code for chap_core.adaptors.command_line_interface

import yaml
from cyclopts import App

# from chap_core.models.model_template_interface import ModelConfiguration
from chap_core.database.model_templates_and_config_tables import ModelConfiguration
from chap_core.datatypes import remove_field, create_tsdataclass
from chap_core.external.model_configuration import RunnerConfig, EntryPointConfig, CommandConfig
from chap_core.model_spec import get_dataclass
from chap_core.models.model_template_interface import InternalModelTemplate
from chap_core.spatio_temporal_data.temporal_dataclass import DataSet
import logging

logger = logging.getLogger(__name__)


# TODO write an app for a model template

[docs]
def generate_app(estimator):
    app = App()
    dc = get_dataclass(estimator)

    @app.command()
    def train(training_data_filename: str, model_path: str):
        """
        Train a model using historic data

        Parameters
        ----------
        training_data_filename: str
            The path to the training data file
        model_path: str
            The path to save the trained model
        """
        logger.info(f"Loading data from {training_data_filename} as {dc}")
        dataset = DataSet.from_csv(training_data_filename, dc)
        predictor = estimator.train(dataset)
        predictor.save(model_path)

    @app.command()
    def predict(model_filename: str, historic_data_filename: str, future_data_filename: str, output_filename: str):
        """
        Predict using a trained model

        Parameters
        ----------
        model_filename: str
            The path to the model file trained with the train command
        historic_data_filename: str
            The path to the historic data file, i.e. real data up to the present/prediction start
        future_data_filename: str
            The path to the future data file, i.e. forecasted predictors for the future
        """
        dataset = DataSet.from_csv(historic_data_filename, dc)
        future_dc = remove_field(dc, "disease_cases")
        future_data = DataSet.from_csv(future_data_filename, future_dc)
        predictor = estimator.load_predictor(model_filename)
        forecasts = predictor.predict(dataset, future_data)
        forecasts.to_csv(output_filename)

    return app




[docs]
def generate_template_app(model_template: InternalModelTemplate, name: str = "default"):
    app = App()

    @app.command()
    def train(training_data_filename: str, model_path: str, model_config_path: str = None):
        """
        Train a model using historic data

        Parameters
        ----------
        training_data_filename: str
            The path to the training data file
        model_path: str
            The path to save the trained model
        """
        model_config = _read_model_config(model_config_path)

        # TODO: create method in ModelTemplate to get the actual fields
        # Or give the model the responsibilitiy
        estimator = model_template.get_model(model_config)
        dc = _get_dataclass(estimator)
        dataset = DataSet.from_csv(training_data_filename, dc)

        predictor = estimator.train(dataset)
        predictor.save(model_path)

    def _get_dataclass(estimator):
        data_fields = estimator.covariate_names + [model_template.model_template_info.target]
        dc = create_tsdataclass(data_fields)
        return dc

    def _read_model_config(model_config_path):
        if model_config_path is not None:
            with open(model_config_path, "r") as file:
                model_config = yaml.safe_load(file)
            # model_config = model_template.get_config_class().parse_file(model_config_path)
        else:
            model_config = {}
        return ModelConfiguration.model_validate(model_config)
        # return model_config

    # TODO: send in model config again here
    @app.command()
    def predict(
        model_filename: str,
        historic_data_filename: str,
        future_data_filename: str,
        output_filename: str,
        model_config_path: str = None,
    ):
        """
        Predict using a trained model

        Parameters
        ----------
        training_data_filename: str
            The path to the training data file
        model_filename: str
            The path to the model file trained with the train command
        historic_data_filename: str
            The path to the historic data file, i.e. real data up to the present/prediction start
        future_data_filename: str
            The path to the future data file, i.e. forecasted predictors for the future
        """
        # model_config = model_template.get_config_class().parse_file(model_config_path)
        model_config = _read_model_config(model_config_path)

        estimator = model_template.get_model(model_config)
        dc = _get_dataclass(estimator)
        future_dc = remove_field(dc, "disease_cases")
        predictor = estimator.load_predictor(model_filename)

        dataset = DataSet.from_csv(historic_data_filename, dc)
        future_data = DataSet.from_csv(future_data_filename, future_dc)

        forecasts = predictor.predict(dataset, future_data)
        forecasts.to_csv(output_filename)

    @app.command()
    def write_template_yaml(pyenv_filename: str | None = None):
        """
        Write the model template to a yaml file
        """
        runner_config = RunnerConfig(
            entry_points=EntryPointConfig(
                train=CommandConfig(
                    command="python main.py train {train_data} {model} {model_config}",
                    parameters={"train_data": "str", "model": "str", "model_config": "str"},
                ),
                predict=CommandConfig(
                    command="python main.py predict {model} {historic_data} {future_data} {out_file} {model_config}",
                    parameters={
                        "model": "str",
                        "historic_data": "str",
                        "future_data": "str",
                        "out_file": "str",
                        "model_config": "str",
                    },
                ),
            )
        )
        info = model_template.model_template_info.model_dump(mode="json") | {"name": name}
        runner_config.python_env = pyenv_filename

        print(yaml.dump(info))
        print(yaml.dump(runner_config.model_dump(exclude_unset=True), sort_keys=False))

    return app, train, predict, write_template_yaml