Logging

Logging (`payn.Logging.Logger`)

Serves as the centralized logging of the pipeline, interfacing directly with MLflow and Optuna to ensure full experiment reproducibility and transparency. It abstracts the complexity of artifact serialization (DataFrames to CSV buffers) and hyperparameter tracking.

Parameter Logging: The log_model_hyperparameters function differentiates between user-defined overrides and model defaults, logging both (defaults prefixed with config_) to clarify exactly which parameters drove the model's behavior.
Artifact Serialization: Utilizes StringIO buffers to log pandas DataFrames directly to MLflow without creating intermediate temporary files on disk, reducing I/O overhead and filesystem clutter.
Reproducibility: Logs the exact state of the Optuna study (trials, best parameters, durations) and the resulting model attributes (feature importances, best iteration), allowing for the reconstruction of the optimization trajectory.
Data Snapshots: Captures specific slices of the data at critical checkpoints (e.g., log_spysplit_data, log_augmen_negatives) to verify the correctness of the PU split logic post-execution.

Central logging utility for the PAYN framework.

Handles the logging of: - Configuration parameters - Dataframes (as CSV artifacts) - Model hyperparameters and attributes (CatBoost) - Optimization studies (Optuna) - Evaluation metrics and thresholds

Attributes:

Name	Type	Description
`config`	`Dict[str, Any]`	The global configuration dictionary.

Source code in payn\Logging\logging.py

class Logger:
    """
    Central logging utility for the PAYN framework.

    Handles the logging of:
    - Configuration parameters
    - Dataframes (as CSV artifacts)
    - Model hyperparameters and attributes (CatBoost)
    - Optimization studies (Optuna)
    - Evaluation metrics and thresholds

    Attributes:
        config (Dict[str, Any]): The global configuration dictionary.
    """
    def __init__(self, config: dict) -> None:
        """
        Initialize the Logger.

        Args:
            config (Dict[str, Any]): Configuration dictionary.
        """
        self.config = config

    def log_config_to_mlflow(self, print_config: bool = True) -> None:
        """
        Log all configuration parameters to MLflow.

        Args:
            print_config (bool): If True, prints parameters to stdout.
        """
        for section, params in self.config.items():
            if print_config:
                print(f"{section}:")
            for key, value in params.items():
                mlflow.log_param(f"{section}.{key}", value)
                if print_config:
                    print(f"\t {key}: {value}")


    def _log_dataframe_as_artifact(self, dataframe: pd.DataFrame, artifact_name: str) -> None:
        """
        Log a DataFrame to MLflow as an artifact using an in-memory buffer.

        Args:
            dataframe (pd.DataFrame): DataFrame to log.
            artifact_name (str): Name of the artifact file (e.g. 'data.csv')
        """
        try:
            # Use StringIO to avoid writing to disk
            buffer = StringIO()
            dataframe.to_csv(buffer, index=False)
            buffer.seek(0)
            mlflow.log_text(buffer.getvalue(), artifact_name)
        except Exception as e:
            print(f"[ERROR] Failed logging artifact {artifact_name}: {e}")

    def log_fold_data(self, train_data: pd.DataFrame, val_data: pd.DataFrame, test_data: pd.DataFrame, fold_index: Optional[int] = None)-> None:
        """
        Log train, validation, and test splits as artifacts.

        Args:
            train_data (pd.DataFrame): Training dataset.
            val_data (pd.DataFrame): Validation dataset.
            test_data (pd.DataFrame): Test dataset.
            fold_index (int, optional): The index of the K-Fold split.
        """
        suffix = f"_fold_{fold_index}" if fold_index is not None else ""

        self._log_dataframe_as_artifact(train_data, f"train_data{suffix}.csv")
        self._log_dataframe_as_artifact(val_data, f"val_data{suffix}.csv")
        self._log_dataframe_as_artifact(test_data, f"test_data{suffix}.csv")

    def log_spysplit_data(self, train_data: pd.DataFrame, unlabeled_data: pd.DataFrame)-> None:
        """
        Log the initial PU split (Labeled Train vs. Unlabeled) as artifacts.

        Args:
            train_data (pd.DataFrame): Labeled positive training data.
            unlabeled_data (pd.DataFrame): Unlabeled data pool.
        """
        # Log datasets using the utility function
        self._log_dataframe_as_artifact(train_data, "train_data_spysplit.csv")
        self._log_dataframe_as_artifact(unlabeled_data, "unlabeled_data_spysplit.csv")

    def log_spy_infiltrated_data(self, spy_inf_train_data: pd.DataFrame, spies: pd.DataFrame)-> None:
        """
        Log spy-infiltrated training set as an artifact to MLflow.

        Args:
            spy_inf_train_data (pd.DataFrame): The combined training set (Positives + Unlabeled/Spies).
            spies (pd.DataFrame): The subset of Positives used as Spies.
        """
        self._log_dataframe_as_artifact(spy_inf_train_data, "spy_inf_train_data.csv")
        self._log_dataframe_as_artifact(spies, "spies.csv")

    def log_model_hyperparameters(self, model: Union[CatBoostClassifier, CatBoostRegressor], **kwargs: Any) -> None:
        """
        Log the hyperparameters of the model to MLflow.

        Logs both user-provided hyperparameters and the config hyperparameters of the CatBoost model.
        User-specified parameters are logged as-is, while non-overridden defaults are prefixed with 'config_'.

        Args:
            model (CatBoostClassifier): CatBoost model instance.
            **kwargs (Any): Additional keyword arguments for user-specified hyperparameters.
        """
        # Log user-provided hyperparameters
        valid_params = model.get_params().keys()
        filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_params}
        mlflow.log_params(filtered_kwargs)

        # Log config hyperparameters (not overridden by user)
        try:
            config_params = model.get_params()
            for key, value in config_params.items():
                if key not in filtered_kwargs:
                    mlflow.log_param(f"config_{key}", value)
        except Exception as e:
            print(f"Error logging config hyperparameters: {e}")

        # Log counts for transparency
        mlflow.log_param("user_provided_hyperparameters_count", len(filtered_kwargs))
        mlflow.log_param("config_hyperparameters_count", len(config_params) - len(filtered_kwargs))


    def log_model(self, model: Union[CatBoostClassifier, CatBoostRegressor], artifact_name: str) -> None:
        """
        Save and log a CatBoost model artifact.

        Args:
            model (Union[CatBoostClassifier, CatBoostRegressor]): The trained model.
            artifact_name (str): Filename for the saved model.
        """
        model.save_model(artifact_name)
        mlflow.log_artifact(artifact_name)


    def log_model_attributes(self, model: Union[CatBoostClassifier, CatBoostRegressor]) -> None:
        """
        Log internal attributes (best iteration, feature importance) of a trained model.

        Args:
            model (Union[CatBoostClassifier, CatBoostRegressor]): The trained model.
        """
        try:
            # Log best iteration and best score (if available)
            if hasattr(model, "best_iteration_"):
                mlflow.log_param("best_iteration", model.best_iteration_)
            if hasattr(model, "best_score_"):
                for dataset, metrics in model.best_score_.items():
                    for metric_name, value in metrics.items():
                        mlflow.log_metric(f"training_{dataset}_{metric_name}", value)

            if hasattr(model, "classes_"):
                mlflow.log_param("classes", model.classes_)

            # Log evaluation results
            if hasattr(model, "evals_result_"):
                evals_result = model.evals_result_
                for dataset, metrics in evals_result.items():
                    for metric_name, values in metrics.items():
                        mlflow.log_metric(f"{dataset}_{metric_name}_final", values[-1])

            # Log feature importance
            if hasattr(model, "feature_importances_"):
                feature_importances = pd.DataFrame(
                    {"Feature_Index": range(len(model.feature_importances_)), "Importance": model.feature_importances_}
                )
                self._log_dataframe_as_artifact(feature_importances, "feature_importances.csv")

            # Log number of features
            if hasattr(model, "n_features_in_"):
                mlflow.log_param("n_features", model.n_features_in_)
            # Log number of trees
            if hasattr(model, "tree_count_"):
                mlflow.log_param("tree_count", model.tree_count_)

        except Exception as e:
            print(f"Error logging model attributes: {e}")


    def log_evaluation_metrics(self, eval_result: dict) -> None:
        """
        Log evaluation metrics of the model to MLflow.

        Args:
            eval_result (dict): Evaluation results from test set.
        """
        for metric_name, values in eval_result.items():
            mlflow.log_metric(f"test_{metric_name}", values[-1])

    def log_optuna_study(self, study: optuna.Study) -> None:
        """
        Log Optuna study details to MLflow, including parameters, metrics, and a summary.

        Args:
            study (optuna.Study): The Optuna study object.
        """
        try:
            # Log best parameters and best value
            mlflow.log_params(study.best_params)
            mlflow.log_metric("best_value", study.best_value)

            # Log best trial details
            best_trial = study.best_trial
            mlflow.log_param("best_trial_number", best_trial.number)
            mlflow.log_param("best_trial_duration", str(best_trial.duration))
            for param_name, param_value in best_trial.params.items():
                mlflow.log_param(f"best_trial_param_{param_name}", param_value)

            # Log study summary to a StringIO buffer
            summary_buffer = StringIO()
            summary_buffer.write("Optuna Study Summary\n")
            summary_buffer.write(f"Study Name: {study.study_name}\n")
            summary_buffer.write(f"Direction: {study.directions}\n")
            summary_buffer.write(f"Best Value: {study.best_value}\n")
            summary_buffer.write(f"Best Params: {study.best_params}\n")
            summary_buffer.write(f"Number of Trials: {len(study.trials)}\n")
            summary_buffer.write("\nTrial Details:\n")
            for trial in study.trials:
                summary_buffer.write(f"Trial {trial.number}: Value={trial.value}, Params={trial.params}\n")

            mlflow.log_text(summary_buffer.getvalue(), "optuna_study_summary.txt")

            # Log detailed trials DataFrame
            trials_data = [
                {
                    "trial_number": t.number,
                    "value": t.value,
                    "params": t.params,
                    "duration": str(t.duration),
                    "state": str(t.state),
                    "datetime_start": str(t.datetime_start),
                    "datetime_complete": str(t.datetime_complete),
                }
                for t in study.trials
            ]
            trials_df = pd.DataFrame(trials_data)
            self._log_dataframe_as_artifact(trials_df, "optuna_trials_summary.csv")

        except Exception as e:
            print(f"Error logging Optuna study: {e}")

    def log_study_visualizations(self, study: optuna.Study, visualizer: Any) -> None:
        """
        Log visualization plots generated from an Optuna study.

        Args:
            study (optuna.Study): The study object.
            visualizer: Instance of the Visualisation class.
        """
        try:
            visualizer.plot_optuna_study(study, log_to_mlflow=True)
        except Exception as e:
            print(f"Error logging Optuna visualizations: {e}")


    def log_image_to_mlflow(self, image_path: str, artifact_name: Optional[str] = None) -> None:
        """Log an image to MLflow.

        Args:
            image_path (str): Path to the image file.
            artifact_name (Optional[str]): Name for the artifact in MLflow. If None, uses the image file name.
        """
        if not artifact_name:
            artifact_name = os.path.basename(image_path)
        try:
            mlflow.log_artifact(image_path, artifact_path=artifact_name)
        except Exception as e:
            print(f"Error logging artifact {artifact_name}: {e}")

    def log_threshold(self, threshold: float) -> None:
        """
        Log the determined threshold for identifying augmented reliable negatives.

        Args:
            threshold (float): The calculated threshold.
        """
        mlflow.log_param(f"augmen_threshold", threshold)
        print(f"Logged threshold: {threshold}")

    def log_augmen_negatives(self, augmen_real_negatives: pd.DataFrame, fold_index: int) -> None:
        """
        Log augmented reliable negatives as an artifact and count.

        Args:
            augmen_real_negatives (pd.DataFrame): Identified negative samples.
            fold_index (int): Current fold index.
        """
        artifact_name = f"augmen_real_negatives_fold_{fold_index}.csv"
        self._log_dataframe_as_artifact(augmen_real_negatives, artifact_name)

        mlflow.log_metric(f"augmen_real_negatives_count_fold_{fold_index}", len(augmen_real_negatives))
        print(f"Logged augmented negatives count for fold {fold_index}: {len(augmen_real_negatives)}")

    def log_probabilities(self, spy_inf_data: pd.DataFrame, fold_index: int) -> None:
        """
        Log predicted probabilities for spy-infused data.

        Args:
            spy_inf_data (pd.DataFrame): Data with probabilities.
            fold_index (int): Current fold index.
        """
        artifact_name = f"spy_inf_train_probabilities_fold_{fold_index}.csv"
        self._log_dataframe_as_artifact(spy_inf_data[['Prob_0', 'Prob_1', 'Class_by_threshold']], artifact_name)
        print(f"Logged probabilities for fold {fold_index}")

    def log_message(self, message: str) -> None:
        """
        Log a generic message.

        Args:
            message (str): The message to log.
        """
        # print(message)
        try:
            # Log the message to MLflow (this could be customized as needed)
            mlflow.log_text(message, "log_message.txt")
        except Exception as e:
            print(f"Failed to log message to MLflow: {e}")

    def log_evaluation_summary(self, evaluation_results: Dict[str, Any],
                               artifact_name: Optional[str] = "evaluation_summary.json") -> None:
        """
        Log the evaluation summary as a formatted JSON artifact in MLflow.

        Args:
            evaluation_results (Dict[str, Any]): The dictionary of evaluation results.
            artifact_name (Optional[str]): The artifact file name. Defaults to "evaluation_summary.json".
        """
        try:
            evaluation_json = json.dumps(evaluation_results, indent=2)
            mlflow.log_text(evaluation_json, artifact_name)
            self.log_message(f"Logged evaluation summary to artifact {artifact_name}")
        except Exception as e:
            print(f"Error logging evaluation summary: {e}")

    def log_metric_individual(self, metrics: Dict[str, float], prefix: Optional[str] = "") -> None:
        """
        Log individual metrics using mlflow.log_metric for easy viewing in the MLflow UI.

        Args:
            metrics (Dict[str, float]): Dictionary of metric names and values.
            prefix (Optional[str]): Optional prefix to add to metric names.
        """
        for metric_name, value in metrics.items():
            mlflow.log_metric(f"{prefix}{metric_name}", value)
            self.log_message(f"Logged metric {prefix}{metric_name}: {value}")

`init(config)`

Initialize the Logger.

Parameters:

Name	Type	Description	Default
`config`	`Dict[str, Any]`	Configuration dictionary.	required

Source code in payn\Logging\logging.py

def __init__(self, config: dict) -> None:
    """
    Initialize the Logger.

    Args:
        config (Dict[str, Any]): Configuration dictionary.
    """
    self.config = config

`log_augmen_negatives(augmen_real_negatives, fold_index)`

Log augmented reliable negatives as an artifact and count.

Parameters:

Name	Type	Description	Default
`augmen_real_negatives`	`DataFrame`	Identified negative samples.	required
`fold_index`	`int`	Current fold index.	required

Source code in payn\Logging\logging.py

def log_augmen_negatives(self, augmen_real_negatives: pd.DataFrame, fold_index: int) -> None:
    """
    Log augmented reliable negatives as an artifact and count.

    Args:
        augmen_real_negatives (pd.DataFrame): Identified negative samples.
        fold_index (int): Current fold index.
    """
    artifact_name = f"augmen_real_negatives_fold_{fold_index}.csv"
    self._log_dataframe_as_artifact(augmen_real_negatives, artifact_name)

    mlflow.log_metric(f"augmen_real_negatives_count_fold_{fold_index}", len(augmen_real_negatives))
    print(f"Logged augmented negatives count for fold {fold_index}: {len(augmen_real_negatives)}")

`log_config_to_mlflow(print_config=True)`

Log all configuration parameters to MLflow.

Parameters:

Name	Type	Description	Default
`print_config`	`bool`	If True, prints parameters to stdout.	`True`

Source code in payn\Logging\logging.py

def log_config_to_mlflow(self, print_config: bool = True) -> None:
    """
    Log all configuration parameters to MLflow.

    Args:
        print_config (bool): If True, prints parameters to stdout.
    """
    for section, params in self.config.items():
        if print_config:
            print(f"{section}:")
        for key, value in params.items():
            mlflow.log_param(f"{section}.{key}", value)
            if print_config:
                print(f"\t {key}: {value}")

`log_evaluation_metrics(eval_result)`

Log evaluation metrics of the model to MLflow.

Parameters:

Name	Type	Description	Default
`eval_result`	`dict`	Evaluation results from test set.	required

Source code in payn\Logging\logging.py

def log_evaluation_metrics(self, eval_result: dict) -> None:
    """
    Log evaluation metrics of the model to MLflow.

    Args:
        eval_result (dict): Evaluation results from test set.
    """
    for metric_name, values in eval_result.items():
        mlflow.log_metric(f"test_{metric_name}", values[-1])

`log_evaluation_summary(evaluation_results, artifact_name='evaluation_summary.json')`

Log the evaluation summary as a formatted JSON artifact in MLflow.

Parameters:

Name	Type	Description	Default
`evaluation_results`	`Dict[str, Any]`	The dictionary of evaluation results.	required
`artifact_name`	`Optional[str]`	The artifact file name. Defaults to "evaluation_summary.json".	`'evaluation_summary.json'`

Source code in payn\Logging\logging.py

def log_evaluation_summary(self, evaluation_results: Dict[str, Any],
                           artifact_name: Optional[str] = "evaluation_summary.json") -> None:
    """
    Log the evaluation summary as a formatted JSON artifact in MLflow.

    Args:
        evaluation_results (Dict[str, Any]): The dictionary of evaluation results.
        artifact_name (Optional[str]): The artifact file name. Defaults to "evaluation_summary.json".
    """
    try:
        evaluation_json = json.dumps(evaluation_results, indent=2)
        mlflow.log_text(evaluation_json, artifact_name)
        self.log_message(f"Logged evaluation summary to artifact {artifact_name}")
    except Exception as e:
        print(f"Error logging evaluation summary: {e}")

`log_fold_data(train_data, val_data, test_data, fold_index=None)`

Log train, validation, and test splits as artifacts.

Parameters:

Name	Type	Description	Default
`train_data`	`DataFrame`	Training dataset.	required
`val_data`	`DataFrame`	Validation dataset.	required
`test_data`	`DataFrame`	Test dataset.	required
`fold_index`	`int`	The index of the K-Fold split.	`None`

Source code in payn\Logging\logging.py

def log_fold_data(self, train_data: pd.DataFrame, val_data: pd.DataFrame, test_data: pd.DataFrame, fold_index: Optional[int] = None)-> None:
    """
    Log train, validation, and test splits as artifacts.

    Args:
        train_data (pd.DataFrame): Training dataset.
        val_data (pd.DataFrame): Validation dataset.
        test_data (pd.DataFrame): Test dataset.
        fold_index (int, optional): The index of the K-Fold split.
    """
    suffix = f"_fold_{fold_index}" if fold_index is not None else ""

    self._log_dataframe_as_artifact(train_data, f"train_data{suffix}.csv")
    self._log_dataframe_as_artifact(val_data, f"val_data{suffix}.csv")
    self._log_dataframe_as_artifact(test_data, f"test_data{suffix}.csv")

`log_image_to_mlflow(image_path, artifact_name=None)`

Log an image to MLflow.

Parameters:

Name	Type	Description	Default
`image_path`	`str`	Path to the image file.	required
`artifact_name`	`Optional[str]`	Name for the artifact in MLflow. If None, uses the image file name.	`None`

Source code in payn\Logging\logging.py

def log_image_to_mlflow(self, image_path: str, artifact_name: Optional[str] = None) -> None:
    """Log an image to MLflow.

    Args:
        image_path (str): Path to the image file.
        artifact_name (Optional[str]): Name for the artifact in MLflow. If None, uses the image file name.
    """
    if not artifact_name:
        artifact_name = os.path.basename(image_path)
    try:
        mlflow.log_artifact(image_path, artifact_path=artifact_name)
    except Exception as e:
        print(f"Error logging artifact {artifact_name}: {e}")

`log_message(message)`

Log a generic message.

Parameters:

Name	Type	Description	Default
`message`	`str`	The message to log.	required

Source code in payn\Logging\logging.py

def log_message(self, message: str) -> None:
    """
    Log a generic message.

    Args:
        message (str): The message to log.
    """
    # print(message)
    try:
        # Log the message to MLflow (this could be customized as needed)
        mlflow.log_text(message, "log_message.txt")
    except Exception as e:
        print(f"Failed to log message to MLflow: {e}")

`log_metric_individual(metrics, prefix='')`

Log individual metrics using mlflow.log_metric for easy viewing in the MLflow UI.

Parameters:

Name	Type	Description	Default
`metrics`	`Dict[str, float]`	Dictionary of metric names and values.	required
`prefix`	`Optional[str]`	Optional prefix to add to metric names.	`''`

Source code in payn\Logging\logging.py

def log_metric_individual(self, metrics: Dict[str, float], prefix: Optional[str] = "") -> None:
    """
    Log individual metrics using mlflow.log_metric for easy viewing in the MLflow UI.

    Args:
        metrics (Dict[str, float]): Dictionary of metric names and values.
        prefix (Optional[str]): Optional prefix to add to metric names.
    """
    for metric_name, value in metrics.items():
        mlflow.log_metric(f"{prefix}{metric_name}", value)
        self.log_message(f"Logged metric {prefix}{metric_name}: {value}")

`log_model(model, artifact_name)`

Save and log a CatBoost model artifact.

Parameters:

Name	Type	Description	Default
`model`	`Union[CatBoostClassifier, CatBoostRegressor]`	The trained model.	required
`artifact_name`	`str`	Filename for the saved model.	required

Source code in payn\Logging\logging.py

def log_model(self, model: Union[CatBoostClassifier, CatBoostRegressor], artifact_name: str) -> None:
    """
    Save and log a CatBoost model artifact.

    Args:
        model (Union[CatBoostClassifier, CatBoostRegressor]): The trained model.
        artifact_name (str): Filename for the saved model.
    """
    model.save_model(artifact_name)
    mlflow.log_artifact(artifact_name)

`log_model_attributes(model)`

Log internal attributes (best iteration, feature importance) of a trained model.

Parameters:

Name	Type	Description	Default
`model`	`Union[CatBoostClassifier, CatBoostRegressor]`	The trained model.	required

Source code in payn\Logging\logging.py

def log_model_attributes(self, model: Union[CatBoostClassifier, CatBoostRegressor]) -> None:
    """
    Log internal attributes (best iteration, feature importance) of a trained model.

    Args:
        model (Union[CatBoostClassifier, CatBoostRegressor]): The trained model.
    """
    try:
        # Log best iteration and best score (if available)
        if hasattr(model, "best_iteration_"):
            mlflow.log_param("best_iteration", model.best_iteration_)
        if hasattr(model, "best_score_"):
            for dataset, metrics in model.best_score_.items():
                for metric_name, value in metrics.items():
                    mlflow.log_metric(f"training_{dataset}_{metric_name}", value)

        if hasattr(model, "classes_"):
            mlflow.log_param("classes", model.classes_)

        # Log evaluation results
        if hasattr(model, "evals_result_"):
            evals_result = model.evals_result_
            for dataset, metrics in evals_result.items():
                for metric_name, values in metrics.items():
                    mlflow.log_metric(f"{dataset}_{metric_name}_final", values[-1])

        # Log feature importance
        if hasattr(model, "feature_importances_"):
            feature_importances = pd.DataFrame(
                {"Feature_Index": range(len(model.feature_importances_)), "Importance": model.feature_importances_}
            )
            self._log_dataframe_as_artifact(feature_importances, "feature_importances.csv")

        # Log number of features
        if hasattr(model, "n_features_in_"):
            mlflow.log_param("n_features", model.n_features_in_)
        # Log number of trees
        if hasattr(model, "tree_count_"):
            mlflow.log_param("tree_count", model.tree_count_)

    except Exception as e:
        print(f"Error logging model attributes: {e}")

`log_model_hyperparameters(model, **kwargs)`

Log the hyperparameters of the model to MLflow.

Logs both user-provided hyperparameters and the config hyperparameters of the CatBoost model. User-specified parameters are logged as-is, while non-overridden defaults are prefixed with 'config_'.

Parameters:

Name	Type	Description	Default
`model`	`CatBoostClassifier`	CatBoost model instance.	required
`**kwargs`	`Any`	Additional keyword arguments for user-specified hyperparameters.	`{}`

Source code in payn\Logging\logging.py

def log_model_hyperparameters(self, model: Union[CatBoostClassifier, CatBoostRegressor], **kwargs: Any) -> None:
    """
    Log the hyperparameters of the model to MLflow.

    Logs both user-provided hyperparameters and the config hyperparameters of the CatBoost model.
    User-specified parameters are logged as-is, while non-overridden defaults are prefixed with 'config_'.

    Args:
        model (CatBoostClassifier): CatBoost model instance.
        **kwargs (Any): Additional keyword arguments for user-specified hyperparameters.
    """
    # Log user-provided hyperparameters
    valid_params = model.get_params().keys()
    filtered_kwargs = {k: v for k, v in kwargs.items() if k in valid_params}
    mlflow.log_params(filtered_kwargs)

    # Log config hyperparameters (not overridden by user)
    try:
        config_params = model.get_params()
        for key, value in config_params.items():
            if key not in filtered_kwargs:
                mlflow.log_param(f"config_{key}", value)
    except Exception as e:
        print(f"Error logging config hyperparameters: {e}")

    # Log counts for transparency
    mlflow.log_param("user_provided_hyperparameters_count", len(filtered_kwargs))
    mlflow.log_param("config_hyperparameters_count", len(config_params) - len(filtered_kwargs))

`log_optuna_study(study)`

Log Optuna study details to MLflow, including parameters, metrics, and a summary.

Parameters:

Name	Type	Description	Default
`study`	`Study`	The Optuna study object.	required

Source code in payn\Logging\logging.py

def log_optuna_study(self, study: optuna.Study) -> None:
    """
    Log Optuna study details to MLflow, including parameters, metrics, and a summary.

    Args:
        study (optuna.Study): The Optuna study object.
    """
    try:
        # Log best parameters and best value
        mlflow.log_params(study.best_params)
        mlflow.log_metric("best_value", study.best_value)

        # Log best trial details
        best_trial = study.best_trial
        mlflow.log_param("best_trial_number", best_trial.number)
        mlflow.log_param("best_trial_duration", str(best_trial.duration))
        for param_name, param_value in best_trial.params.items():
            mlflow.log_param(f"best_trial_param_{param_name}", param_value)

        # Log study summary to a StringIO buffer
        summary_buffer = StringIO()
        summary_buffer.write("Optuna Study Summary\n")
        summary_buffer.write(f"Study Name: {study.study_name}\n")
        summary_buffer.write(f"Direction: {study.directions}\n")
        summary_buffer.write(f"Best Value: {study.best_value}\n")
        summary_buffer.write(f"Best Params: {study.best_params}\n")
        summary_buffer.write(f"Number of Trials: {len(study.trials)}\n")
        summary_buffer.write("\nTrial Details:\n")
        for trial in study.trials:
            summary_buffer.write(f"Trial {trial.number}: Value={trial.value}, Params={trial.params}\n")

        mlflow.log_text(summary_buffer.getvalue(), "optuna_study_summary.txt")

        # Log detailed trials DataFrame
        trials_data = [
            {
                "trial_number": t.number,
                "value": t.value,
                "params": t.params,
                "duration": str(t.duration),
                "state": str(t.state),
                "datetime_start": str(t.datetime_start),
                "datetime_complete": str(t.datetime_complete),
            }
            for t in study.trials
        ]
        trials_df = pd.DataFrame(trials_data)
        self._log_dataframe_as_artifact(trials_df, "optuna_trials_summary.csv")

    except Exception as e:
        print(f"Error logging Optuna study: {e}")

`log_probabilities(spy_inf_data, fold_index)`

Log predicted probabilities for spy-infused data.

Parameters:

Name	Type	Description	Default
`spy_inf_data`	`DataFrame`	Data with probabilities.	required
`fold_index`	`int`	Current fold index.	required

Source code in payn\Logging\logging.py

def log_probabilities(self, spy_inf_data: pd.DataFrame, fold_index: int) -> None:
    """
    Log predicted probabilities for spy-infused data.

    Args:
        spy_inf_data (pd.DataFrame): Data with probabilities.
        fold_index (int): Current fold index.
    """
    artifact_name = f"spy_inf_train_probabilities_fold_{fold_index}.csv"
    self._log_dataframe_as_artifact(spy_inf_data[['Prob_0', 'Prob_1', 'Class_by_threshold']], artifact_name)
    print(f"Logged probabilities for fold {fold_index}")

`log_spy_infiltrated_data(spy_inf_train_data, spies)`

Log spy-infiltrated training set as an artifact to MLflow.

Parameters:

Name	Type	Description	Default
`spy_inf_train_data`	`DataFrame`	The combined training set (Positives + Unlabeled/Spies).	required
`spies`	`DataFrame`	The subset of Positives used as Spies.	required

Source code in payn\Logging\logging.py

def log_spy_infiltrated_data(self, spy_inf_train_data: pd.DataFrame, spies: pd.DataFrame)-> None:
    """
    Log spy-infiltrated training set as an artifact to MLflow.

    Args:
        spy_inf_train_data (pd.DataFrame): The combined training set (Positives + Unlabeled/Spies).
        spies (pd.DataFrame): The subset of Positives used as Spies.
    """
    self._log_dataframe_as_artifact(spy_inf_train_data, "spy_inf_train_data.csv")
    self._log_dataframe_as_artifact(spies, "spies.csv")

`log_spysplit_data(train_data, unlabeled_data)`

Log the initial PU split (Labeled Train vs. Unlabeled) as artifacts.

Parameters:

Name	Type	Description	Default
`train_data`	`DataFrame`	Labeled positive training data.	required
`unlabeled_data`	`DataFrame`	Unlabeled data pool.	required

Source code in payn\Logging\logging.py

def log_spysplit_data(self, train_data: pd.DataFrame, unlabeled_data: pd.DataFrame)-> None:
    """
    Log the initial PU split (Labeled Train vs. Unlabeled) as artifacts.

    Args:
        train_data (pd.DataFrame): Labeled positive training data.
        unlabeled_data (pd.DataFrame): Unlabeled data pool.
    """
    # Log datasets using the utility function
    self._log_dataframe_as_artifact(train_data, "train_data_spysplit.csv")
    self._log_dataframe_as_artifact(unlabeled_data, "unlabeled_data_spysplit.csv")

`log_study_visualizations(study, visualizer)`

Log visualization plots generated from an Optuna study.

Parameters:

Name	Type	Description	Default
`study`	`Study`	The study object.	required
`visualizer`	`Any`	Instance of the Visualisation class.	required

Source code in payn\Logging\logging.py

def log_study_visualizations(self, study: optuna.Study, visualizer: Any) -> None:
    """
    Log visualization plots generated from an Optuna study.

    Args:
        study (optuna.Study): The study object.
        visualizer: Instance of the Visualisation class.
    """
    try:
        visualizer.plot_optuna_study(study, log_to_mlflow=True)
    except Exception as e:
        print(f"Error logging Optuna visualizations: {e}")

`log_threshold(threshold)`

Log the determined threshold for identifying augmented reliable negatives.

Parameters:

Name	Type	Description	Default
`threshold`	`float`	The calculated threshold.	required

Source code in payn\Logging\logging.py

def log_threshold(self, threshold: float) -> None:
    """
    Log the determined threshold for identifying augmented reliable negatives.

    Args:
        threshold (float): The calculated threshold.
    """
    mlflow.log_param(f"augmen_threshold", threshold)
    print(f"Logged threshold: {threshold}")

Logging

Logging (payn.Logging.Logger)

__init__(config)

log_augmen_negatives(augmen_real_negatives, fold_index)

log_config_to_mlflow(print_config=True)

log_evaluation_metrics(eval_result)

log_evaluation_summary(evaluation_results, artifact_name='evaluation_summary.json')

log_fold_data(train_data, val_data, test_data, fold_index=None)

log_image_to_mlflow(image_path, artifact_name=None)

log_message(message)

log_metric_individual(metrics, prefix='')

log_model(model, artifact_name)

log_model_attributes(model)

log_model_hyperparameters(model, **kwargs)

log_optuna_study(study)

log_probabilities(spy_inf_data, fold_index)

log_spy_infiltrated_data(spy_inf_train_data, spies)

log_spysplit_data(train_data, unlabeled_data)

log_study_visualizations(study, visualizer)

log_threshold(threshold)

Logging (`payn.Logging.Logger`)

`init(config)`

`log_augmen_negatives(augmen_real_negatives, fold_index)`

`log_config_to_mlflow(print_config=True)`

`log_evaluation_metrics(eval_result)`

`log_evaluation_summary(evaluation_results, artifact_name='evaluation_summary.json')`

`log_fold_data(train_data, val_data, test_data, fold_index=None)`

`log_image_to_mlflow(image_path, artifact_name=None)`

`log_message(message)`

`log_metric_individual(metrics, prefix='')`

`log_model(model, artifact_name)`

`log_model_attributes(model)`

`log_model_hyperparameters(model, **kwargs)`

`log_optuna_study(study)`

`log_probabilities(spy_inf_data, fold_index)`

`log_spy_infiltrated_data(spy_inf_train_data, spies)`

`log_spysplit_data(train_data, unlabeled_data)`

`log_study_visualizations(study, visualizer)`

`log_threshold(threshold)`