Source code for chap_core.runners.helper_functions
import logging
from typing import Literal, Optional
from chap_core.external.model_configuration import ModelTemplateConfigV2
from chap_core.models.model_template import ModelConfiguration
from chap_core.runners.command_line_runner import CommandLineRunner, CommandLineTrainPredictRunner
from chap_core.runners.docker_runner import DockerRunner, DockerTrainPredictRunner
from chap_core.runners.mlflow_runner import MlFlowTrainPredictRunner
from chap_core.runners.runner import TrainPredictRunner
import yaml
from pathlib import Path
logger = logging.getLogger(__name__)
[docs]
def get_train_predict_runner_from_model_template_config(
model_template_config: ModelTemplateConfigV2,
working_dir: Path,
skip_environment=False,
model_configuration: Optional["ModelConfiguration"] = None,
) -> TrainPredictRunner:
"""
Utility function that returns a suitbale runner for a model given a ModelTemplateConfig (which contains information
about what runner the Template says that its models shold use)
Returns a TrainPredictRunner (e.g. a MlFlowTrainPredictRunner or a DockerTrainPredictRunner) by parsing
the config for the template.
"""
if model_template_config.docker_env is not None:
runner_type = "docker"
elif model_template_config.python_env is not None:
runner_type = "mlflow"
else:
runner_type = ""
skip_environment = True
logger.info(f"skip_environement: {skip_environment}, runner_type: {runner_type}")
logger.info(f"Model Configuration: {model_configuration}")
yaml_filename = "model_configuration_for_run.yaml"
model_configuration_file = working_dir / yaml_filename
with open(model_configuration_file, "w") as file:
model_configuration = model_configuration or {}
d = model_configuration if isinstance(model_configuration, dict) else model_configuration.model_dump()
yaml.dump(d, file)
if skip_environment or runner_type == "docker":
# read yaml file into a dict
train_command = model_template_config.entry_points.train.command # data["entry_points"]["train"]["command"]
predict_command = (
model_template_config.entry_points.predict.command
) # data["entry_points"]["predict"]["command"]
# dump model configuration to a tmp file in working_dir, pass this file to the train and predict command
# pydantic write to yaml
# under development
# if model_configuration is not None:
# train_command += f" --model_configuration {model_configuration_file}"
# predict_command += f" --model_configuration {model_configuration_file}"
if skip_environment:
return CommandLineTrainPredictRunner(
CommandLineRunner(working_dir),
train_command,
predict_command,
model_configuration_filename=yaml_filename,
)
else:
assert model_template_config.docker_env is not None
logging.info(f"Docker image is {model_template_config.docker_env.image}")
command_runner = DockerRunner(model_template_config.docker_env.image, working_dir)
return DockerTrainPredictRunner(command_runner, train_command, predict_command, yaml_filename)
else:
# assert model_configuration is None or model_configuration == {}, "ModelConfiguration (for templates) not supported when runner is mlflow for now"
assert runner_type == "mlflow"
return MlFlowTrainPredictRunner(
working_dir,
model_configuration_filename=yaml_filename,
train_params=model_template_config.entry_points.train.parameters.keys(),
)
[docs]
def get_train_predict_runner(
mlproject_file: Path, runner_type: Literal["mlflow", "docker"], skip_environment=False
) -> TrainPredictRunner:
"""
Returns a TrainPredictRunner based on the runner_type.
If runner_type is "mlflow", returns an MlFlowTrainPredictRunner.
If runner_type is "docker", the mlproject file is parsed to create a runner
if skip_environment, mlflow and docker is not used, instead returning a TrainPredictRunner that uses the command line
"""
logger.info(f"skip_environement: {skip_environment}, runner_type: {runner_type}")
if skip_environment or runner_type == "docker":
working_dir = mlproject_file.parent
# read yaml file into a dict
with open(mlproject_file, "r") as file:
data = yaml.load(file, Loader=yaml.FullLoader)
train_command = data["entry_points"]["train"]["command"]
predict_command = data["entry_points"]["predict"]["command"]
if skip_environment:
return CommandLineTrainPredictRunner(CommandLineRunner(working_dir), train_command, predict_command)
else:
assert "docker_env" in data, "Runner type is docker, but no docker_env in mlproject file"
logging.info(f"Docker image is {data['docker_env']['image']}")
command_runner = DockerRunner(data["docker_env"]["image"], working_dir)
return DockerTrainPredictRunner(command_runner, train_command, predict_command)
else:
assert runner_type == "mlflow"
return MlFlowTrainPredictRunner(mlproject_file.parent)