Source code for chap_core.runners.helper_functions

import logging
from typing import Literal, Optional
from chap_core.external.model_configuration import ModelTemplateConfigV2
from chap_core.models.model_template import ModelConfiguration
from chap_core.runners.command_line_runner import CommandLineRunner, CommandLineTrainPredictRunner
from chap_core.runners.docker_runner import DockerRunner, DockerTrainPredictRunner
from chap_core.runners.mlflow_runner import MlFlowTrainPredictRunner
from chap_core.runners.runner import TrainPredictRunner
import yaml
from pathlib import Path

logger = logging.getLogger(__name__)



[docs]
def get_train_predict_runner_from_model_template_config(
    model_template_config: ModelTemplateConfigV2,
    working_dir: Path,
    skip_environment=False,
    model_configuration: Optional["ModelConfiguration"] = None,
) -> TrainPredictRunner:
    """
    Utility function that returns a suitbale runner for a model given a ModelTemplateConfig (which contains information
    about what runner the Template says that its models shold use)
    Returns a TrainPredictRunner (e.g. a MlFlowTrainPredictRunner or a DockerTrainPredictRunner) by parsing
    the config for the template.
    """
    if model_template_config.docker_env is not None:
        runner_type = "docker"
    elif model_template_config.python_env is not None:
        runner_type = "mlflow"
    else:
        runner_type = ""
        skip_environment = True

    logger.info(f"skip_environement: {skip_environment}, runner_type: {runner_type}")
    logger.info(f"Model Configuration: {model_configuration}")
    yaml_filename = "model_configuration_for_run.yaml"
    model_configuration_file = working_dir / yaml_filename
    with open(model_configuration_file, "w") as file:
        model_configuration = model_configuration or {}
        d = model_configuration if isinstance(model_configuration, dict) else model_configuration.model_dump()
        yaml.dump(d, file)

    if skip_environment or runner_type == "docker":
        # read yaml file into a dict
        train_command = model_template_config.entry_points.train.command  # data["entry_points"]["train"]["command"]
        predict_command = (
            model_template_config.entry_points.predict.command
        )  # data["entry_points"]["predict"]["command"]

        # dump model configuration to a tmp file in working_dir, pass this file to the train and predict command
        # pydantic write to yaml
        # under development
        # if model_configuration is not None:
        #     train_command += f" --model_configuration {model_configuration_file}"
        #     predict_command += f" --model_configuration {model_configuration_file}"
        if skip_environment:
            return CommandLineTrainPredictRunner(
                CommandLineRunner(working_dir),
                train_command,
                predict_command,
                model_configuration_filename=yaml_filename,
            )
        else:
            assert model_template_config.docker_env is not None

        logging.info(f"Docker image is {model_template_config.docker_env.image}")
        command_runner = DockerRunner(model_template_config.docker_env.image, working_dir)
        return DockerTrainPredictRunner(command_runner, train_command, predict_command, yaml_filename)
    else:
        # assert model_configuration is None or model_configuration == {}, "ModelConfiguration (for templates) not supported when runner is mlflow for now"
        assert runner_type == "mlflow"
        return MlFlowTrainPredictRunner(
            working_dir,
            model_configuration_filename=yaml_filename,
            train_params=model_template_config.entry_points.train.parameters.keys(),
        )




[docs]
def get_train_predict_runner(
    mlproject_file: Path, runner_type: Literal["mlflow", "docker"], skip_environment=False
) -> TrainPredictRunner:
    """
    Returns a TrainPredictRunner based on the runner_type.
    If runner_type is "mlflow", returns an MlFlowTrainPredictRunner.
    If runner_type is "docker", the mlproject file is parsed to create a runner
    if skip_environment, mlflow and docker is not used, instead returning a TrainPredictRunner that uses the command line
    """
    logger.info(f"skip_environement: {skip_environment}, runner_type: {runner_type}")
    if skip_environment or runner_type == "docker":
        working_dir = mlproject_file.parent

        # read yaml file into a dict
        with open(mlproject_file, "r") as file:
            data = yaml.load(file, Loader=yaml.FullLoader)

        train_command = data["entry_points"]["train"]["command"]
        predict_command = data["entry_points"]["predict"]["command"]

        if skip_environment:
            return CommandLineTrainPredictRunner(CommandLineRunner(working_dir), train_command, predict_command)
        else:
            assert "docker_env" in data, "Runner type is docker, but no docker_env in mlproject file"

        logging.info(f"Docker image is {data['docker_env']['image']}")
        command_runner = DockerRunner(data["docker_env"]["image"], working_dir)
        return DockerTrainPredictRunner(command_runner, train_command, predict_command)
    else:
        assert runner_type == "mlflow"
        return MlFlowTrainPredictRunner(mlproject_file.parent)