Skip to content

Tracking API Reference

The tracking module implements experiment management. The DualTracker allows for simultaneous logging to local JSON/TensorBoard files and a remote or local MLflow server, ensuring both real-time visualization and long-term experiment comparison.

base

Base class and registry for experiment trackers.

Classes

BaseTracker

Bases: ABC

Abstract base for experiment tracking.

All trackers must implement: - start_run: Begin a new experiment run - log_params: Log hyperparameters - log_metrics: Log metrics (optionally at specific step) - log_artifact: Log file artifacts - end_run: Finalize the run

Functions
end_run abstractmethod
end_run() -> None

End the current run.

Source code in src/tracking/base.py
@abstractmethod
def end_run(self) -> None:
    """End the current run."""
    pass
log_artifact abstractmethod
log_artifact(path: Path, name: Optional[str] = None) -> None

Log a file artifact.

Parameters:

Name Type Description Default
path Path

Path to the artifact file

required
name Optional[str]

Optional name for the artifact

None
Source code in src/tracking/base.py
@abstractmethod
def log_artifact(self, path: Path, name: Optional[str] = None) -> None:
    """Log a file artifact.

    Args:
        path: Path to the artifact file
        name: Optional name for the artifact
    """
    pass
log_metrics abstractmethod
log_metrics(metrics: Dict[str, float], step: Optional[int] = None) -> None

Log metrics (optionally at a specific step).

Parameters:

Name Type Description Default
metrics Dict[str, float]

Dictionary of metric name to value

required
step Optional[int]

Optional step/epoch number

None
Source code in src/tracking/base.py
@abstractmethod
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
    """Log metrics (optionally at a specific step).

    Args:
        metrics: Dictionary of metric name to value
        step: Optional step/epoch number
    """
    pass
log_params abstractmethod
log_params(params: Dict[str, Any]) -> None

Log hyperparameters.

Parameters:

Name Type Description Default
params Dict[str, Any]

Dictionary of parameters

required
Source code in src/tracking/base.py
@abstractmethod
def log_params(self, params: Dict[str, Any]) -> None:
    """Log hyperparameters.

    Args:
        params: Dictionary of parameters
    """
    pass
start_run abstractmethod
start_run(run_name: str, config: Dict[str, Any]) -> str

Start a new run, return run_id.

Parameters:

Name Type Description Default
run_name str

Name for this run

required
config Dict[str, Any]

Configuration dictionary

required

Returns:

Type Description
str

Unique run identifier

Source code in src/tracking/base.py
@abstractmethod
def start_run(self, run_name: str, config: Dict[str, Any]) -> str:
    """Start a new run, return run_id.

    Args:
        run_name: Name for this run
        config: Configuration dictionary

    Returns:
        Unique run identifier
    """
    pass

TrackerRegistry

Registry for tracker classes.

Functions
get classmethod
get(name: str, **kwargs) -> BaseTracker

Get a tracker instance by name.

Parameters:

Name Type Description Default
name str

Registry name

required
**kwargs

Arguments for tracker constructor

{}

Returns:

Type Description
BaseTracker

Tracker instance

Source code in src/tracking/base.py
@classmethod
def get(cls, name: str, **kwargs) -> BaseTracker:
    """Get a tracker instance by name.

    Args:
        name: Registry name
        **kwargs: Arguments for tracker constructor

    Returns:
        Tracker instance
    """
    if name not in cls._trackers:
        available = list(cls._trackers.keys())
        raise ValueError(f"Unknown tracker: {name}. Available: {available}")
    return cls._trackers[name](**kwargs)
list_available classmethod
list_available() -> list

List all registered tracker names.

Source code in src/tracking/base.py
@classmethod
def list_available(cls) -> list:
    """List all registered tracker names."""
    return list(cls._trackers.keys())
register classmethod
register(name: str)

Decorator to register a tracker class.

Parameters:

Name Type Description Default
name str

Registry name

required

Returns:

Type Description

Decorator function

Source code in src/tracking/base.py
@classmethod
def register(cls, name: str):
    """Decorator to register a tracker class.

    Args:
        name: Registry name

    Returns:
        Decorator function
    """
    def decorator(tracker_class):
        cls._trackers[name] = tracker_class
        return tracker_class
    return decorator

local

Local file-based tracking (JSON + TensorBoard).

Classes

LocalTracker

LocalTracker(base_dir: str = 'artifacts/runs', use_tensorboard: bool = True)

Bases: BaseTracker

Local file-based tracking (JSON + TensorBoard).

Outputs to: artifacts/runs// - config.json - metrics.json - step_metrics.csv - tensorboard/ - artifacts/

Example usage

tracker = LocalTracker() run_id = tracker.start_run("experiment_1", {"lr": 0.001}) tracker.log_metrics({"loss": 0.5}, step=1) tracker.end_run()

Initialize the tracker.

Parameters:

Name Type Description Default
base_dir str

Base directory for run outputs

'artifacts/runs'
use_tensorboard bool

Whether to log to TensorBoard

True
Source code in src/tracking/local.py
def __init__(self, 
             base_dir: str = "artifacts/runs",
             use_tensorboard: bool = True):
    """Initialize the tracker.

    Args:
        base_dir: Base directory for run outputs
        use_tensorboard: Whether to log to TensorBoard
    """
    self.base_dir = Path(base_dir)
    self.use_tensorboard = use_tensorboard

    self.run_dir: Optional[Path] = None
    self.run_id: Optional[str] = None
    self.config: Dict[str, Any] = {}
    self.metrics: Dict[str, Any] = {}
    self.step_metrics: list = []

    self.tb_writer = None
Functions
end_run
end_run() -> None

End the current run.

Source code in src/tracking/local.py
def end_run(self) -> None:
    """End the current run."""
    # Save step metrics as CSV
    if self.step_metrics:
        try:
            import pandas as pd
            df = pd.DataFrame(self.step_metrics)
            df.to_csv(self.run_dir / "step_metrics.csv", index=False)
        except ImportError:
            # Fallback to JSON
            with open(self.run_dir / "step_metrics.json", 'w') as f:
                json.dump(self.step_metrics, f, indent=2)

    if self.tb_writer:
        self.tb_writer.close()

    logger.info(f"Run saved to: {self.run_dir}")
get_run_dir
get_run_dir() -> Optional[Path]

Get current run directory.

Source code in src/tracking/local.py
def get_run_dir(self) -> Optional[Path]:
    """Get current run directory."""
    return self.run_dir
log_artifact
log_artifact(path: Path, name: Optional[str] = None) -> None

Log a file artifact.

Parameters:

Name Type Description Default
path Path

Path to artifact

required
name Optional[str]

Optional name

None
Source code in src/tracking/local.py
def log_artifact(self, path: Path, name: Optional[str] = None) -> None:
    """Log a file artifact.

    Args:
        path: Path to artifact
        name: Optional name
    """
    artifacts_dir = self.run_dir / "artifacts"
    artifacts_dir.mkdir(exist_ok=True)

    dest_name = name or Path(path).name
    shutil.copy(path, artifacts_dir / dest_name)
    logger.debug(f"Logged artifact: {dest_name}")
log_figure
log_figure(figure, name: str) -> None

Log a matplotlib figure.

Parameters:

Name Type Description Default
figure

Matplotlib figure

required
name str

Figure name

required
Source code in src/tracking/local.py
def log_figure(self, figure, name: str) -> None:
    """Log a matplotlib figure.

    Args:
        figure: Matplotlib figure
        name: Figure name
    """
    figures_dir = self.run_dir / "figures"
    figures_dir.mkdir(exist_ok=True)

    figure.savefig(figures_dir / f"{name}.png", dpi=150, bbox_inches='tight')

    if self.tb_writer:
        self.tb_writer.add_figure(name, figure)
log_metrics
log_metrics(metrics: Dict[str, float], step: Optional[int] = None) -> None

Log metrics.

Parameters:

Name Type Description Default
metrics Dict[str, float]

Metric name to value mapping

required
step Optional[int]

Optional step number

None
Source code in src/tracking/local.py
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
    """Log metrics.

    Args:
        metrics: Metric name to value mapping
        step: Optional step number
    """
    # Update final metrics
    self.metrics.update(metrics)

    # Save to file
    with open(self.run_dir / "metrics.json", 'w') as f:
        json.dump(self.metrics, f, indent=2)

    # Log to TensorBoard
    if self.tb_writer and step is not None:
        for k, v in metrics.items():
            self.tb_writer.add_scalar(k, v, step)

    # Append to step history
    if step is not None:
        self.step_metrics.append({'step': step, **metrics})
log_params
log_params(params: Dict[str, Any]) -> None

Log additional parameters.

Parameters:

Name Type Description Default
params Dict[str, Any]

Parameters to log

required
Source code in src/tracking/local.py
def log_params(self, params: Dict[str, Any]) -> None:
    """Log additional parameters.

    Args:
        params: Parameters to log
    """
    self.config.update(params)
    with open(self.run_dir / "config.json", 'w') as f:
        json.dump(self.config, f, indent=2, default=str)
start_run
start_run(run_name: str, config: Dict[str, Any]) -> str

Start a new run.

Parameters:

Name Type Description Default
run_name str

Name for this run

required
config Dict[str, Any]

Configuration dictionary

required

Returns:

Type Description
str

Run ID

Source code in src/tracking/local.py
def start_run(self, run_name: str, config: Dict[str, Any]) -> str:
    """Start a new run.

    Args:
        run_name: Name for this run
        config: Configuration dictionary

    Returns:
        Run ID
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    self.run_id = f"{run_name}_{timestamp}"
    self.run_dir = self.base_dir / self.run_id
    self.run_dir.mkdir(parents=True, exist_ok=True)

    self.config = config
    self.metrics = {}
    self.step_metrics = []

    # Save config
    with open(self.run_dir / "config.json", 'w') as f:
        json.dump(config, f, indent=2, default=str)

    # Initialize TensorBoard
    if self.use_tensorboard:
        try:
            from torch.utils.tensorboard import SummaryWriter
            self.tb_writer = SummaryWriter(self.run_dir / "tensorboard")
            logger.info(f"TensorBoard logging to: {self.run_dir / 'tensorboard'}")
        except ImportError:
            logger.warning("TensorBoard not available")
            self.tb_writer = None

    logger.info(f"Started run: {self.run_id}")
    return self.run_id

mlflow_tracker

MLflow-based experiment tracking.

Classes

MLflowTracker

MLflowTracker(tracking_uri: str = 'file:./artifacts/mlruns', experiment_name: str = 'battery_degradation')

Bases: BaseTracker

MLflow-based tracking.

Provides: - Centralized experiment tracking - Model versioning - Artifact storage - UI for experiment comparison

Example usage

tracker = MLflowTracker(tracking_uri="file:./mlruns") run_id = tracker.start_run("experiment_1", {"lr": 0.001}) tracker.log_metrics({"loss": 0.5}, step=1) tracker.end_run()

Initialize the tracker.

Parameters:

Name Type Description Default
tracking_uri str

MLflow tracking URI

'file:./artifacts/mlruns'
experiment_name str

Experiment name

'battery_degradation'
Source code in src/tracking/mlflow_tracker.py
def __init__(self,
             tracking_uri: str = "file:./artifacts/mlruns",
             experiment_name: str = "battery_degradation"):
    """Initialize the tracker.

    Args:
        tracking_uri: MLflow tracking URI
        experiment_name: Experiment name
    """
    try:
        import mlflow
        self.mlflow = mlflow
    except ImportError:
        raise ImportError("mlflow required. Install with: pip install mlflow")

    self.mlflow.set_tracking_uri(tracking_uri)
    self.mlflow.set_experiment(experiment_name)

    self.tracking_uri = tracking_uri
    self.experiment_name = experiment_name
    self.run = None
    self.run_id: Optional[str] = None

    logger.info(f"MLflow tracking URI: {tracking_uri}")
Functions
end_run
end_run() -> None

End the current run.

Source code in src/tracking/mlflow_tracker.py
def end_run(self) -> None:
    """End the current run."""
    if self.run is not None:
        try:
            self.mlflow.end_run()
            logger.info(f"Ended MLflow run: {self.run_id}")
        except Exception as e:
            logger.warning(f"Failed to end MLflow run: {e}")
        finally:
            self.run = None
            self.run_id = None
get_run_id
get_run_id() -> Optional[str]

Get current run ID.

Source code in src/tracking/mlflow_tracker.py
def get_run_id(self) -> Optional[str]:
    """Get current run ID."""
    return self.run_id
log_artifact
log_artifact(path: Path, name: Optional[str] = None) -> None

Log a file artifact.

Parameters:

Name Type Description Default
path Path

Path to artifact

required
name Optional[str]

Optional subfolder name

None
Source code in src/tracking/mlflow_tracker.py
def log_artifact(self, path: Path, name: Optional[str] = None) -> None:
    """Log a file artifact.

    Args:
        path: Path to artifact
        name: Optional subfolder name
    """
    if self.run is None or self.mlflow.active_run() is None:
        logger.warning("No active MLflow run. Cannot log artifact.")
        return

    try:
        if name:
            self.mlflow.log_artifact(str(path), name)
        else:
            self.mlflow.log_artifact(str(path))
    except Exception as e:
        logger.warning(f"Failed to log artifact to MLflow: {e}")
log_metrics
log_metrics(metrics: Dict[str, float], step: Optional[int] = None) -> None

Log metrics.

Parameters:

Name Type Description Default
metrics Dict[str, float]

Metric name to value mapping

required
step Optional[int]

Optional step number

None
Source code in src/tracking/mlflow_tracker.py
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
    """Log metrics.

    Args:
        metrics: Metric name to value mapping
        step: Optional step number
    """
    if self.run is None or self.mlflow.active_run() is None:
        logger.warning("No active MLflow run. Cannot log metrics.")
        return

    try:
        self.mlflow.log_metrics(metrics, step=step)
    except Exception as e:
        logger.warning(f"Failed to log metrics to MLflow: {e}")
log_model
log_model(model, artifact_path: str = 'model') -> None

Log a PyTorch model.

Parameters:

Name Type Description Default
model

PyTorch model

required
artifact_path str

Path in artifact store

'model'
Source code in src/tracking/mlflow_tracker.py
def log_model(self, model, artifact_path: str = "model") -> None:
    """Log a PyTorch model.

    Args:
        model: PyTorch model
        artifact_path: Path in artifact store
    """
    try:
        self.mlflow.pytorch.log_model(model, artifact_path)
    except Exception as e:
        # Fall back to saving state dict
        logger.warning(f"Failed to log model with pytorch flavor: {e}")
        import torch
        import tempfile
        with tempfile.NamedTemporaryFile(suffix='.pt', delete=False) as f:
            torch.save(model.state_dict(), f.name)
            self.mlflow.log_artifact(f.name, artifact_path)
log_params
log_params(params: Dict[str, Any]) -> None

Log additional parameters.

Parameters:

Name Type Description Default
params Dict[str, Any]

Parameters to log

required
Source code in src/tracking/mlflow_tracker.py
def log_params(self, params: Dict[str, Any]) -> None:
    """Log additional parameters.

    Args:
        params: Parameters to log
    """
    if self.run is None or self.mlflow.active_run() is None:
        logger.warning("No active MLflow run. Cannot log params.")
        return

    flat_params = self._flatten_dict(params)
    for k, v in flat_params.items():
        try:
            self.mlflow.log_param(k, str(v)[:500])
        except Exception as e:
            logger.warning(f"Failed to log param {k}: {e}")
start_run
start_run(run_name: str, config: Dict[str, Any]) -> str

Start a new run.

Parameters:

Name Type Description Default
run_name str

Name for this run

required
config Dict[str, Any]

Configuration dictionary

required

Returns:

Type Description
str

Run ID

Source code in src/tracking/mlflow_tracker.py
def start_run(self, run_name: str, config: Dict[str, Any]) -> str:
    """Start a new run.

    Args:
        run_name: Name for this run
        config: Configuration dictionary

    Returns:
        Run ID
    """
    # End any existing run first
    if self.run is not None:
        try:
            self.mlflow.end_run()
        except Exception:
            pass

    try:
        self.run = self.mlflow.start_run(run_name=run_name)
        self.run_id = self.run.info.run_id

        # Log config as params (flatten nested dicts)
        flat_config = self._flatten_dict(config)

        # MLflow has a 500 char limit on param values
        for k, v in flat_config.items():
            str_val = str(v)
            if len(str_val) > 500:
                str_val = str_val[:497] + "..."
            try:
                self.mlflow.log_param(k, str_val)
            except Exception as e:
                logger.warning(f"Failed to log param {k}: {e}")

        logger.info(f"Started MLflow run: {self.run_id}")
        return self.run_id
    except Exception as e:
        logger.error(f"Failed to start MLflow run: {e}")
        self.run = None
        self.run_id = None
        raise

dual_tracker

Dual tracker that logs to both local and MLflow simultaneously.

Classes

DualTracker

DualTracker(local_base_dir: str = 'artifacts/runs', use_tensorboard: bool = True, mlflow_tracking_uri: str = 'file:./artifacts/mlruns', mlflow_experiment_name: str = 'battery_degradation')

Bases: BaseTracker

Tracker that logs to both local files and MLflow.

Provides the best of both worlds: - Local: Fast access, TensorBoard, works offline - MLflow: Centralized UI, model registry, comparison

Example usage

tracker = DualTracker() run_id = tracker.start_run("experiment_1", {"lr": 0.001}) tracker.log_metrics({"loss": 0.5}, step=1) tracker.end_run()

Initialize dual tracker.

Parameters:

Name Type Description Default
local_base_dir str

Base directory for local runs

'artifacts/runs'
use_tensorboard bool

Whether to enable TensorBoard

True
mlflow_tracking_uri str

MLflow tracking URI

'file:./artifacts/mlruns'
mlflow_experiment_name str

MLflow experiment name

'battery_degradation'
Source code in src/tracking/dual_tracker.py
def __init__(self,
             local_base_dir: str = "artifacts/runs",
             use_tensorboard: bool = True,
             mlflow_tracking_uri: str = "file:./artifacts/mlruns",
             mlflow_experiment_name: str = "battery_degradation"):
    """Initialize dual tracker.

    Args:
        local_base_dir: Base directory for local runs
        use_tensorboard: Whether to enable TensorBoard
        mlflow_tracking_uri: MLflow tracking URI
        mlflow_experiment_name: MLflow experiment name
    """
    self.local = LocalTracker(
        base_dir=local_base_dir,
        use_tensorboard=use_tensorboard
    )

    try:
        self.mlflow = MLflowTracker(
            tracking_uri=mlflow_tracking_uri,
            experiment_name=mlflow_experiment_name
        )
        self._has_mlflow = True
    except ImportError:
        logger.warning("MLflow not available, using local tracking only")
        self.mlflow = None
        self._has_mlflow = False

    self.run_id: Optional[str] = None
Functions
end_run
end_run() -> None

End run on both backends.

Source code in src/tracking/dual_tracker.py
def end_run(self) -> None:
    """End run on both backends."""
    self.local.end_run()

    if self._has_mlflow:
        try:
            self.mlflow.end_run()
        except Exception as e:
            logger.warning(f"Failed to end MLflow run: {e}")
get_run_dir
get_run_dir() -> Optional[Path]

Get local run directory.

Source code in src/tracking/dual_tracker.py
def get_run_dir(self) -> Optional[Path]:
    """Get local run directory."""
    return self.local.get_run_dir()
log_artifact
log_artifact(path: Path, name: Optional[str] = None) -> None

Log artifact to both backends.

Parameters:

Name Type Description Default
path Path

Path to artifact

required
name Optional[str]

Optional name

None
Source code in src/tracking/dual_tracker.py
def log_artifact(self, path: Path, name: Optional[str] = None) -> None:
    """Log artifact to both backends.

    Args:
        path: Path to artifact
        name: Optional name
    """
    self.local.log_artifact(path, name)

    if self._has_mlflow:
        try:
            self.mlflow.log_artifact(path, name)
        except Exception as e:
            logger.warning(f"Failed to log artifact to MLflow: {e}")
log_figure
log_figure(figure, name: str) -> None

Log matplotlib figure.

Parameters:

Name Type Description Default
figure

Matplotlib figure

required
name str

Figure name

required
Source code in src/tracking/dual_tracker.py
def log_figure(self, figure, name: str) -> None:
    """Log matplotlib figure.

    Args:
        figure: Matplotlib figure
        name: Figure name
    """
    self.local.log_figure(figure, name)
log_metrics
log_metrics(metrics: Dict[str, float], step: Optional[int] = None) -> None

Log metrics to both backends.

Parameters:

Name Type Description Default
metrics Dict[str, float]

Metric name to value mapping

required
step Optional[int]

Optional step number

None
Source code in src/tracking/dual_tracker.py
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
    """Log metrics to both backends.

    Args:
        metrics: Metric name to value mapping
        step: Optional step number
    """
    self.local.log_metrics(metrics, step)

    if self._has_mlflow:
        try:
            self.mlflow.log_metrics(metrics, step)
        except Exception as e:
            logger.warning(f"Failed to log metrics to MLflow: {e}")
log_model
log_model(model, artifact_path: str = 'model') -> None

Log PyTorch model to MLflow.

Parameters:

Name Type Description Default
model

PyTorch model

required
artifact_path str

Artifact path

'model'
Source code in src/tracking/dual_tracker.py
def log_model(self, model, artifact_path: str = "model") -> None:
    """Log PyTorch model to MLflow.

    Args:
        model: PyTorch model
        artifact_path: Artifact path
    """
    if self._has_mlflow:
        try:
            self.mlflow.log_model(model, artifact_path)
        except Exception as e:
            logger.warning(f"Failed to log model to MLflow: {e}")
log_params
log_params(params: Dict[str, Any]) -> None

Log parameters to both backends.

Parameters:

Name Type Description Default
params Dict[str, Any]

Parameters to log

required
Source code in src/tracking/dual_tracker.py
def log_params(self, params: Dict[str, Any]) -> None:
    """Log parameters to both backends.

    Args:
        params: Parameters to log
    """
    self.local.log_params(params)

    if self._has_mlflow:
        try:
            self.mlflow.log_params(params)
        except Exception as e:
            logger.warning(f"Failed to log params to MLflow: {e}")
start_run
start_run(run_name: str, config: Dict[str, Any]) -> str

Start a run on both backends.

Parameters:

Name Type Description Default
run_name str

Name for this run

required
config Dict[str, Any]

Configuration dictionary

required

Returns:

Type Description
str

Local run ID

Source code in src/tracking/dual_tracker.py
def start_run(self, run_name: str, config: Dict[str, Any]) -> str:
    """Start a run on both backends.

    Args:
        run_name: Name for this run
        config: Configuration dictionary

    Returns:
        Local run ID
    """
    # Start local run first
    self.run_id = self.local.start_run(run_name, config)

    # Start MLflow run
    if self._has_mlflow:
        try:
            mlflow_run_id = self.mlflow.start_run(run_name, config)
            logger.info(f"Dual tracking: local={self.run_id}, mlflow={mlflow_run_id}")
        except Exception as e:
            logger.warning(f"Failed to start MLflow run: {e}")

    return self.run_id