Source code for chap_core.hpo.searcher

from typing import Any, Optional
import itertools
import random
import optuna
import math
from .base import Int, Float

_TRIAL_ID_KEY = "_trial_id"  # reserved key we inject into params



[docs]
class Searcher:
    """Abstract optimizer interface.

    Implementations should:
    - call `reset(space)` before use
    - repeatedly return configurations via `ask()` until None (no more work)
    - receive feedback via `tell(params, result)`
    """


[docs]
    def reset(self, space: Any) -> None: ...


[docs]
    def ask(self) -> Optional[dict[str, Any]]: ...


[docs]
    def tell(self, params: dict[str, Any], result: float) -> None: ...





[docs]
class GridSearcher(Searcher):
    def __init__(self):
        self._iterator: dict[str, Any] = None


[docs]
    def reset(self, search_space: dict[str, list]) -> None:
        self.keys = list(search_space.keys())
        self._iterator = itertools.product(*search_space.values())



[docs]
    def ask(self) -> Optional[dict[str, Any]]:
        if self._iterator is None:
            raise RuntimeError("GridSearch not initialized. Call reset(params).")
        try:
            ne = next(self._iterator)
            print(f"SEARCHER params: {dict(zip(self.keys, ne))}")
            return dict(zip(self.keys, ne))
        except StopIteration:
            return None



[docs]
    def tell(self, params: dict[str, Any], result: float) -> None:
        # Grid search doesn't adapt, but we keep the hook for API symmetry.
        return





[docs]
class RandomSearcher(Searcher):
    """
    Samples with replacement max_trials number of configurations.
    """

    def __init__(self, max_trials: int):
        if not isinstance(max_trials, int) or max_trials <= 0:
            raise ValueError("max_trials must be a positive integer")
        self.max_trials = max_trials


[docs]
    def reset(self, search_space: dict[str, Any], seed: Optional[int] = None) -> None:
        self.search_space = _validate_search_space_extended(search_space)
        print(f"randomSearcher search space in reset: {self.search_space}")
        self.rng = random.Random(seed)
        self.keys = list(search_space.keys())
        self.emitted = 0


    def _sample_float(self, s: Float) -> float:
        if s.log:
            low_log, high_log = math.log(s.low), math.log(s.high)
            u = self.rng.uniform(low_log, high_log)
            return math.exp(u)

        if s.step is None:
            return self.rng.uniform(s.low, s.high)

        n_float = (s.high - s.low) / s.step
        n = int(math.floor(n_float + 1e-12))
        k = self.rng.randint(0, n)
        return s.low + k * s.step

    def _sample_int(self, s: Int) -> int:
        if s.log:
            low_log, high_log = math.log(s.low), math.log(s.high + 1)  # +1 allows high to be sampled bc floor
            u = self.rng.uniform(low_log, high_log)
            x = int(math.floor(math.exp(u)))
            return max(s.low, min(x, s.high))  # floating-point edges issues

        if s.step == 1:
            return self.rng.randint(s.low, s.high)

        n = (s.high - s.low) // s.step
        k = self.rng.randint(0, n)
        return s.low + k * s.step

    def _sample_one(self, spec: Any) -> Any:
        if isinstance(spec, list):
            return self.rng.choice(spec)
        if isinstance(spec, Float):
            return self._sample_float(spec)
        if isinstance(spec, Int):
            return self._sample_int(spec)
        raise TypeError(f"Unsupported spec at runtime: {spec!r}")


[docs]
    def ask(self) -> Optional[dict[str, Any]]:
        if self.rng is None:
            raise RuntimeError("RandomSearch not initialized. Call reset(search_space, seed)")
        if self.emitted >= self.max_trials:
            return None
        params = {k: self._sample_one(self.search_space[k]) for k in self.keys}
        # config = {k: self.rng.choice(self.search_space[k]) for k in self.keys}
        self.emitted += 1
        return params



[docs]
    def tell(self, params: dict[str, Any], result: float) -> None:
        # Random search doesn't adapt, but we keep the hook for API symmetry.
        return





[docs]
class TPESearcher(Searcher):
    """
    Tree Parzen Estimator.
    Parallel-safe TPE searcher using Optuna's ask/tell with native distributions.
    - ask() returns a params dict that includes a reserved '_trial_id'.
    - tell() extracts '_trial_id' from params to update the correct trial.
    Supports:
    - list[...] -> CategoricalDistribution
    - Float(low, high, step=None|>0, log=bool) -> FloatDistribution
    - Int(low, high, step>1, log=bool) -> IntDistribution
    """

    def __init__(self, direction: str = "minimize", max_trials: Optional[int] = None):
        if direction not in ("maximize", "minimize"):
            raise ValueError("direction must be 'maximize' or 'minimize'")
        self.direction = direction
        self.max_trials = max_trials
        self._pending: dict[int, optuna.trial.Trial] = {}
        self._study: Optional[optuna.study.Study] = None
        self._asked = 0


[docs]
    def reset(self, search_space: dict[str, list], seed: Optional[int] = None) -> None:
        # validate_search_space(search_space)
        search_space = _validate_search_space_extended(search_space)

        self._keys = list(search_space.keys())
        self._dists = {
            k: _to_optuna_distr(v)
            for k, v in search_space.items()
            # k: optuna.distributions.CategoricalDistribution(tuple(search_space[k]))
            # for k in self._keys
        }
        self._study = optuna.create_study(
            direction=self.direction,
            sampler=optuna.samplers.TPESampler(seed=seed),
        )
        self._pending.clear()
        self._asked = 0



[docs]
    def ask(self) -> Optional[dict[str, Any]]:
        if self._study is None:
            raise RuntimeError("TPESearcher not initialized. Call reset(search_space, seed)")

        if self.max_trials is not None and self._asked >= self.max_trials:
            return None

        trial = self._study.ask(self._dists)
        self._pending[trial.number] = trial
        self._asked += 1

        params = dict(trial.params)
        params[_TRIAL_ID_KEY] = trial.number
        return params



[docs]
    def tell(self, params: dict[str, Any], result: float) -> None:
        if _TRIAL_ID_KEY not in params:
            raise ValueError(f"params must include '{_TRIAL_ID_KEY}' returned by ask()")

        trial_id = params[_TRIAL_ID_KEY]
        trial = self._pending.pop(trial_id, None)
        if trial is None:
            raise KeyError(f"No pending trial with id {trial_id}")

        self._study.tell(trial, result)





[docs]
def validate_search_space(search_space: dict[str, list]):
    if not isinstance(search_space, dict) or not search_space:
        raise ValueError("search_space must be a non-empty dict[str, list]")
    for k, v in search_space.items():
        if not isinstance(v, list) or not v:
            raise ValueError(f"search_space['{k}'] must be a non-empty list; got {v!r}")



def _validate_search_space_extended(search_space: dict[str, Any]) -> dict[str, Any]:
    if not isinstance(search_space, dict) or not search_space:
        raise ValueError("search_space must be a non-empty dict")

    normalized: dict[str, Any] = {}

    for k, spec in search_space.items():
        print(f"key, spec in validate_space: {k}, {spec}")
        # Categorical
        if isinstance(spec, list):
            if not spec:
                raise ValueError(f"list for '{k}' must be non-empty")
            normalized[k] = list(spec)
            continue

        # Suggest float
        if isinstance(spec, Float):
            low, high = float(spec.low), float(spec.high)
            if not (low < high):  # low != high
                raise ValueError(f"Float('{k}'): low < high required")
            if spec.log:
                if spec.step is not None:
                    raise ValueError(f"Float('{k}'): step must be None when log=True")
                if low <= 0 or high <= 0:
                    raise ValueError(f"Float('{k}'): log=True requires low, high > 0")
            else:
                if spec.step is not None:
                    if not (isinstance(spec.step, (int, float)) and spec.step > 0):
                        raise ValueError(f"Float('{k}'): step must be > 0")
            normalized[k] = Float(low=low, high=high, step=spec.step, log=spec.log)
            continue

        # Suggest int
        if isinstance(spec, Int):
            low, high, step = int(spec.low), int(spec.high), int(spec.step)
            if not (low <= high):
                raise ValueError(f"Int('{k}'): low <= high required")
            if step < 1:
                raise ValueError(f"Int('{k}'): step must be >= 1")
            if spec.log:
                if step != 1:
                    raise ValueError(f"Int('{k}'): step must be 1 when log=True")
                if low <= 0 or high <= 0:
                    raise ValueError(f"Int('{k}'): log=True requies low, high > 0")
            normalized[k] = Int(low=low, high=high, step=step, log=spec.log)
            continue

        raise ValueError(f"Unsupported spec for '{k}': expected list, Float, or Int; got {type(spec).__name__}")

    return normalized


def _to_optuna_distr(spec: Any):
    """
    Convert our spec to an Optuna Distribution.
    Supports: list (categorical), Float, Int.
    """
    if isinstance(spec, list):
        if not spec:
            raise ValueError("categorical list must be non-empty")
        return optuna.distributions.CategoricalDistribution(tuple(spec))

    if isinstance(spec, Float):
        return optuna.distributions.FloatDistribution(
            low=spec.low,
            high=spec.high,
            step=spec.step,
            log=spec.log,
        )

    if isinstance(spec, Int):
        return optuna.distributions.IntDistribution(
            low=spec.low,
            high=spec.high,
            step=spec.step,
            log=spec.log,
        )

    raise TypeError(f"Unsupported spec type: {type(spec).__name__}. Expected list, Float, or Int.")