Skip to content

Configuration

Configuration Management

Centralized configuration management is employed to ensure that experimental conditions are explicitly defined and reproducible. PAYN utilizes a hierarchical configuration file (YAML or JSON) ensure all experimental conditions—from data paths to hyperparameter search spaces—are explicitly defined and version-controllable.

Loading Configuration (payn.ConfigLoader.ConfigLoader)

The ConfigLoader module serves as the primary interface for state initialization.

  • File Parsing: It handles the ingestion of static configuration files, supporting both YAML and JSON formats with automatic type inference.
  • Type Safety: The loader parses the raw input into structured Python dictionaries, acting as the single source for experimental parameters.
  • Instantiation Factory: Key classes (e.g., Featurisation, Optimisation) implement a from_config class method. This "Config-as-Code" pattern decouples the class logic from the configuration structure, allowing classes to be instantiated programmatically during testing while remaining easily properly configured by the global config during production runs.

Handles the loading of YAML or JSON configuration files.

Attributes:

Name Type Description
config_path str

Path to the configuration file.

file_type str

Detected file type ('yaml' or 'json').

Source code in payn\ConfigLoader\configloader.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class ConfigLoader:
    """
    Handles the loading of YAML or JSON configuration files.

    Attributes:
        config_path (str): Path to the configuration file.
        file_type (str): Detected file type ('yaml' or 'json').
    """
    def __init__(self, config_path: str):
        """
        Initialize the ConfigLoader class.

        Args:
            config_path (str): Path to the input file.

        Raises:
            ValueError: If the file extension is not supported (.yaml, .yml, .json).
        """

        self.config_path = config_path
        self.file_type = self._infer_file_type()

    def _infer_file_type(self) -> str:
        """
        Infer file type based on the file extension.

        Returns:
            str: File type ('yaml' or 'json').

        Raises:
            ValueError: If the file extension is unsupported.
        """
        _, ext = os.path.splitext(self.config_path)
        ext = ext.lower()

        if ext in ['.yaml', '.yml']:
            return 'yaml'
        elif ext == '.json':
            return 'json'
        else:
            raise ValueError(f"Unsupported file extension: {ext}")

    def load_config(self) -> dict[str, Any]:
        """
        Load configuration from the specified file type.

        Returns:
            Dict[str, Any]: A Dictionary containing the configuration data.
        """
        if self.file_type == 'yaml':
            return self._load_yaml()
        elif self.file_type == 'json':
            return self._load_json()
        else:
            raise ValueError(f"Unsupported file type: {self.file_type}")

    def _load_yaml(self) -> Dict[str, Any]:
        """
        Load configuration from a YAML file.

        Returns:
            Dict[str, Any]: A Dictionary containing the configuration data.

        Raises:
            FileNotFoundError: If the YAML file cannot be found.
            yaml.YAMLError: If the YAML file contains syntax errors.

        """
        try:
            with open(self.config_path, 'r') as file:
                return yaml.safe_load(file)
        except FileNotFoundError as e:
            raise FileNotFoundError(f"YAML file not found: {self.config_path}") from e
        except yaml.YAMLError as e:
            raise ValueError(f"Error parsing YAML file: {e}") from e

    def _load_json(self) -> Dict[str, Any]:
        """
        Load raw configuration from a JSON file.

        Returns:
            Dict[str, Any]: The parsed JSON data.
        Raises:
            FileNotFoundError: If the JSON file cannot be found.
            ValueError: If the JSON file contains syntax errors.
        """
        try:
            with open(self.config_path, 'r') as file:
                return json.load(file)
        except FileNotFoundError as e:
            raise FileNotFoundError(f"JSON file not found: {self.config_path}") from e
        except json.JSONDecodeError as e:
            raise ValueError(f"Error parsing JSON file: {e}") from e

    def get(self, section: str, key: str, default: Any = None) -> Any:
        """
        Retrieve a specific configuration value.

        Args:
            section (str): The top-level section in the configuration (e.g., 'dataset').
            key (str): The specific key within that section.
            default (Any, optional): Value to return if the key is missing. Defaults to None.

        Returns:
            Any: The value associated with the specified section and key, or the default.
        """
        config = self.load_config()
        return config.get(section, {}).get(key, default)

    def get_all(self) -> Dict[str, Any]:
        """
        Retrieve the entire configuration data.

        Returns:
            Dict[str, Any]: A dictionary with all configuration data.
        """
        return self.load_config()

__init__(config_path)

Initialize the ConfigLoader class.

Parameters:

Name Type Description Default
config_path str

Path to the input file.

required

Raises:

Type Description
ValueError

If the file extension is not supported (.yaml, .yml, .json).

Source code in payn\ConfigLoader\configloader.py
15
16
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, config_path: str):
    """
    Initialize the ConfigLoader class.

    Args:
        config_path (str): Path to the input file.

    Raises:
        ValueError: If the file extension is not supported (.yaml, .yml, .json).
    """

    self.config_path = config_path
    self.file_type = self._infer_file_type()

get(section, key, default=None)

Retrieve a specific configuration value.

Parameters:

Name Type Description Default
section str

The top-level section in the configuration (e.g., 'dataset').

required
key str

The specific key within that section.

required
default Any

Value to return if the key is missing. Defaults to None.

None

Returns:

Name Type Description
Any Any

The value associated with the specified section and key, or the default.

Source code in payn\ConfigLoader\configloader.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
def get(self, section: str, key: str, default: Any = None) -> Any:
    """
    Retrieve a specific configuration value.

    Args:
        section (str): The top-level section in the configuration (e.g., 'dataset').
        key (str): The specific key within that section.
        default (Any, optional): Value to return if the key is missing. Defaults to None.

    Returns:
        Any: The value associated with the specified section and key, or the default.
    """
    config = self.load_config()
    return config.get(section, {}).get(key, default)

get_all()

Retrieve the entire configuration data.

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: A dictionary with all configuration data.

Source code in payn\ConfigLoader\configloader.py
116
117
118
119
120
121
122
123
def get_all(self) -> Dict[str, Any]:
    """
    Retrieve the entire configuration data.

    Returns:
        Dict[str, Any]: A dictionary with all configuration data.
    """
    return self.load_config()

load_config()

Load configuration from the specified file type.

Returns:

Type Description
dict[str, Any]

Dict[str, Any]: A Dictionary containing the configuration data.

Source code in payn\ConfigLoader\configloader.py
49
50
51
52
53
54
55
56
57
58
59
60
61
def load_config(self) -> dict[str, Any]:
    """
    Load configuration from the specified file type.

    Returns:
        Dict[str, Any]: A Dictionary containing the configuration data.
    """
    if self.file_type == 'yaml':
        return self._load_yaml()
    elif self.file_type == 'json':
        return self._load_json()
    else:
        raise ValueError(f"Unsupported file type: {self.file_type}")

ArgParser for CLI / Slurm (payn.ConfigLoader.ConfigArgParser)

To facilitate high-throughput computing (HTC) and integration with workload managers (e.g., Slurm), the ConfigArgParser module enables dynamic runtime modification of the configuration.

  • Dynamic Argument Generation: The module recursively traverses the loaded configuration dictionary and automatically generates a corresponding command-line interface (CLI) argument for every parameter (e.g., nested keys like spy_model.eval_metric are mapped to flags like --spy_model_eval_metric).
  • Runtime Overrides: This allows specific parameters to be modified for individual jobs within a batch array without altering the source configuration file, ensuring that the base experimental structure remains constant while variables (e.g., hyperparameters) can be altered.
  • Provenance: All CLI overrides are logged explicitly at the start of the run to ensure the exact set of parameters used for a specific job can be reconstructed.

Dynamically generates an argparse CLI based on a hierarchical YAML/JSON configuration.

This class allows every parameter in the config file to be overridden via command-line flags. It flattens nested structures (e.g., section.key becomes --section_key) and handles type inference automatically.

Attributes:

Name Type Description
config_path str

Path to the source configuration file.

config Dict[str, Any]

The loaded configuration dictionary.

parser ArgumentParser

The generated argument parser.

Source code in payn\ConfigLoader\configArgParser.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
class ConfigArgParser:
    """
    Dynamically generates an argparse CLI based on a hierarchical YAML/JSON configuration.

    This class allows every parameter in the config file to be overridden via command-line flags.
    It flattens nested structures (e.g., `section.key` becomes `--section_key`) and handles
    type inference automatically.

    Attributes:
        config_path (str): Path to the source configuration file.
        config (Dict[str, Any]): The loaded configuration dictionary.
        parser (argparse.ArgumentParser): The generated argument parser.
    """
    def __init__(self, config_path: str) -> None:
        """
        Initialize the parser and generate arguments from the config file.

        Args:
            config_path (str): Path to the YAML or JSON config file.
        """
        self.config_path = config_path
        self.config = self._load_config(config_path)
        self.parser = argparse.ArgumentParser(
            description="Dynamically override PAYN configuration parameters via CLI."
        )

        # Special Case: SLURM/user-Experiment ID shortcut
        self.parser.add_argument(
            "--experiment",
            type=str,
            default=None,
            help="Experiment identifier in the format USER-EXPID (e.g., FBS-FA-056). Overrides general.user and general.experiment_id."
        )

        self._auto_generate_arguments()

    def _load_config(self, path: str) -> Dict[str, Any]:
        """
        Load configuration from a file.

        Args:
            path (str): Path to the file.

        Returns:
            Dict[str, Any]: Loaded configuration data.

        Raises:
            FileNotFoundError: If the file does not exist.
            ValueError: If the file format is unsupported or invalid.
        """
        _, ext = os.path.splitext(path)
        ext = ext.lower()
        try:
            if ext in ['.yaml', '.yml']:
                with open(path, "r") as file:
                    return yaml.safe_load(file)
            elif ext == '.json':
                with open(path, "r") as file:
                    return json.load(file)
            else:
                raise ValueError(f"Unsupported config format: {ext}")
        except FileNotFoundError as e:
            raise FileNotFoundError(f"Configuration file not found: {path}") from e
        except (yaml.YAMLError, json.JSONDecodeError) as e:
            raise ValueError(f"Error parsing configuration file: {e}") from e

    def _auto_generate_arguments(self, prefix: str = "", d: Dict[str, Any] = None):
        """
        Recursively generates CLI arguments from the nested config dictionary.

        Args:
            prefix (str): The current namespace prefix (e.g., "dataset.").
            d (Dict[str, Any], optional): The current dictionary level. Defaults to root config.
        """
        if d is None:
            d = self.config

        for key, value in d.items():
            arg_name = f"{prefix}{key}" if prefix else key

            if isinstance(value, dict):
                # Recursive call for nested dictionaries
                self._auto_generate_arguments(prefix=f"{arg_name}.", d=value)
            else:
                # Flatten key for CLI flag: "dataset.path" -> "--dataset_path"
                arg_flag = "--" + arg_name.replace(".", "_")
                arg_type = self._infer_arg_type(value)

                if isinstance(value, bool):
                    # Handle boolean flags (allowing --flag/--no-flag logic)
                    self.parser.add_argument(
                        arg_flag, type=self._str2bool, nargs='?', const=True,
                        default=None, help=f"Override '{arg_name}' (bool, default: {value})"
                    )
                elif isinstance(value, list):
                    # Handle list arguments
                    elem_type = self._infer_arg_type(value[0]) if value else str
                    self.parser.add_argument(
                        arg_flag, type=elem_type, nargs='+',
                        default=None, help=f"Override '{arg_name}' (list, default: {value})"
                    )
                else:
                    # Handle standard single-value arguments
                    self.parser.add_argument(
                        arg_flag, type=arg_type,
                        default=None, help=f"Override '{arg_name}' (default: {value})"
                    )

    def _infer_arg_type(self, value: Any):
        """Infer the argparse type based on the value in the config."""
        if isinstance(value, bool):
            return self._str2bool
        elif isinstance(value, int):
            return int
        elif isinstance(value, float):
            return float
        else:
            return str

    def _str2bool(self, v: Any):
        """
        Convert CLI string input to a boolean.

        Args:
            v (Any): The input value.

        Returns:
            bool: The interpreted boolean value.

        Raises:
            argparse.ArgumentTypeError: If the value is not a valid boolean string.
        """
        if isinstance(v, bool):
            return v
        if v.lower() in ('yes', 'true', 't', 'y', '1'):
            return True
        elif v.lower() in ('no', 'false', 'f', 'n', '0'):
            return False
        else:
            raise argparse.ArgumentTypeError('Boolean value expected.')

    def parse_args(self) -> argparse.Namespace:
        """Parse the command-line arguments."""
        return self.parser.parse_args()

    def override_config(self, args: argparse.Namespace) -> Dict[str, Any]:
        """
        Apply CLI overrides to the configuration dictionary.

        Args:
            args (argparse.Namespace): The parsed CLI arguments.

        Returns:
            Dict[str, Any]: The updated configuration dictionary.

        Raises:
            KeyError: If a CLI argument cannot be mapped back to the config structure.
        """
        args_dict = vars(args)

        # Handle special case for --experiment
        experiment = args_dict.pop("experiment", None)
        if experiment:
            parts = experiment.split("-", 1)
            if len(parts) == 2:
                user, exp_id = parts
                self.config.setdefault("general", {})["user"] = user
                self.config["general"]["experiment_id"] = exp_id
                print(f"[Info] Parsed experiment name: user='{user}', experiment_id='{exp_id}'")
            else:
                print(f"[Warning] '--experiment' format invalid: '{experiment}' (expected format USER-EXPID)")

        for cli_key, cli_value in args_dict.items():
            if cli_value is not None:
                config_keys = self._split_cli_key(cli_key)
                success = self._set_nested_config_value(config_keys, cli_value)
                if not success:
                    raise KeyError(f"[Error] CLI override failed: config path '{'.'.join(config_keys)}' not found. Please check your flag names against the config structure.")

        return self.config

    def _set_nested_config_value(self, keys: list, value: Any) -> bool:
        """
        Recursively set a value in the nested configuration dictionary.

        Args:
            keys (List[str]): The list of keys representing the path (e.g., ['dataset', 'path']).
            value (Any): The value to set.

        Returns:
            bool: True if successful, False if the path was not found.
        """
        cfg = self.config
        # Traverse to the second-to-last key
        for i, key in enumerate(keys[:-1]):
            if key in cfg and isinstance(cfg[key], dict):
                cfg = cfg[key]
            else:
                # Fallback: Handle underscores in key names (e.g., 'meta_columns')
                # If 'meta_columns' was split into 'meta' and 'columns', try merging them.
                merged_key = "_".join(keys[:i+2])
                remaining_keys = keys[i+2:]
                if merged_key in self.config:
                    cfg = self.config[merged_key]
                    # Update keys list to match the merge and continue
                    # Note: This restart of traversal is a simplification;
                    # for deeper nesting with underscores, this logic might need recursion.
                    # Current implementation assumes single-level underscore issues at the top.
                    keys = [merged_key] + remaining_keys
                    break
                return False
        # Set the final key's value
        final_key = keys[-1]
        if final_key in cfg:
            cfg[final_key] = value
            return True
        return False

    def _split_cli_key(self, cli_key: str) -> list:
        """
        Split a flattened CLI key back into a nested list of keys.

        Uses greedy matching against top-level config sections to handle underscores correctly.
        (e.g., 'spy_model_metric' -> ['spy_model', 'metric'], not ['spy', 'model', 'metric']).

        Args:
            cli_key (str): The flattened key (e.g., "spy_model_eval_metric").

        Returns:
            List[str]: The reconstructed path (e.g., ["spy_model", "eval_metric"]).
        """
        # Sort keys by length (descending) to match the longest possible prefix first (Greedy Match)
        for section in sorted(self.config.keys(), key=len, reverse=True):
            prefix = section + "_"
            if cli_key.startswith(prefix):
                suffix = cli_key[len(prefix):]
                return [section, suffix]
        # If no prefix matches, assume it's a top-level key
        return [cli_key]

    def save_config(self, output_path: str = None):
        """
        Save the current configuration state to a file.

        Args:
            output_path (str, optional): Path to save the file. Defaults to original path.
        """
        output_path = output_path or self.config_path
        with open(output_path, "w") as file:
            if output_path.endswith(".json"):
                json.dump(self.config, file, indent=2)
            else:
                yaml.dump(self.config, file, sort_keys=False)

__init__(config_path)

Initialize the parser and generate arguments from the config file.

Parameters:

Name Type Description Default
config_path str

Path to the YAML or JSON config file.

required
Source code in payn\ConfigLoader\configArgParser.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def __init__(self, config_path: str) -> None:
    """
    Initialize the parser and generate arguments from the config file.

    Args:
        config_path (str): Path to the YAML or JSON config file.
    """
    self.config_path = config_path
    self.config = self._load_config(config_path)
    self.parser = argparse.ArgumentParser(
        description="Dynamically override PAYN configuration parameters via CLI."
    )

    # Special Case: SLURM/user-Experiment ID shortcut
    self.parser.add_argument(
        "--experiment",
        type=str,
        default=None,
        help="Experiment identifier in the format USER-EXPID (e.g., FBS-FA-056). Overrides general.user and general.experiment_id."
    )

    self._auto_generate_arguments()

override_config(args)

Apply CLI overrides to the configuration dictionary.

Parameters:

Name Type Description Default
args Namespace

The parsed CLI arguments.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: The updated configuration dictionary.

Raises:

Type Description
KeyError

If a CLI argument cannot be mapped back to the config structure.

Source code in payn\ConfigLoader\configArgParser.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def override_config(self, args: argparse.Namespace) -> Dict[str, Any]:
    """
    Apply CLI overrides to the configuration dictionary.

    Args:
        args (argparse.Namespace): The parsed CLI arguments.

    Returns:
        Dict[str, Any]: The updated configuration dictionary.

    Raises:
        KeyError: If a CLI argument cannot be mapped back to the config structure.
    """
    args_dict = vars(args)

    # Handle special case for --experiment
    experiment = args_dict.pop("experiment", None)
    if experiment:
        parts = experiment.split("-", 1)
        if len(parts) == 2:
            user, exp_id = parts
            self.config.setdefault("general", {})["user"] = user
            self.config["general"]["experiment_id"] = exp_id
            print(f"[Info] Parsed experiment name: user='{user}', experiment_id='{exp_id}'")
        else:
            print(f"[Warning] '--experiment' format invalid: '{experiment}' (expected format USER-EXPID)")

    for cli_key, cli_value in args_dict.items():
        if cli_value is not None:
            config_keys = self._split_cli_key(cli_key)
            success = self._set_nested_config_value(config_keys, cli_value)
            if not success:
                raise KeyError(f"[Error] CLI override failed: config path '{'.'.join(config_keys)}' not found. Please check your flag names against the config structure.")

    return self.config

parse_args()

Parse the command-line arguments.

Source code in payn\ConfigLoader\configArgParser.py
149
150
151
def parse_args(self) -> argparse.Namespace:
    """Parse the command-line arguments."""
    return self.parser.parse_args()

save_config(output_path=None)

Save the current configuration state to a file.

Parameters:

Name Type Description Default
output_path str

Path to save the file. Defaults to original path.

None
Source code in payn\ConfigLoader\configArgParser.py
248
249
250
251
252
253
254
255
256
257
258
259
260
def save_config(self, output_path: str = None):
    """
    Save the current configuration state to a file.

    Args:
        output_path (str, optional): Path to save the file. Defaults to original path.
    """
    output_path = output_path or self.config_path
    with open(output_path, "w") as file:
        if output_path.endswith(".json"):
            json.dump(self.config, file, indent=2)
        else:
            yaml.dump(self.config, file, sort_keys=False)