Source code for plaid.problem_definition

"""Implementation of the `ProblemDefinition` class."""

# -*- coding: utf-8 -*-
#
# This file is subject to the terms and conditions defined in
# file 'LICENSE.txt', which is part of this source code package.
#
#

# %% Imports

import sys

if sys.version_info >= (3, 11):
    from typing import Self
else:  # pragma: no cover
    from typing import TypeVar


[docs]
    Self = TypeVar("Self")


import csv
import json
import logging
from pathlib import Path
from typing import Optional, Union

import yaml

from plaid.constants import AUTHORIZED_TASKS
from plaid.types import IndexType
from plaid.types.feature_types import FeatureIdentifier
from plaid.utils.deprecation import deprecated

# %% Globals

logger = logging.getLogger(__name__)
logging.basicConfig(
    format="[%(asctime)s:%(levelname)s:%(filename)s:%(funcName)s(%(lineno)d)]:%(message)s",
    level=logging.INFO,
)

# %% Functions

# %% Classes



[docs]
class ProblemDefinition(object):
    """Gathers all necessary informations to define a learning problem."""

    def __init__(
        self,
        path: Optional[Union[str, Path]] = None,
        directory_path: Optional[Union[str, Path]] = None,
    ) -> None:
        """Initialize an empty :class:`ProblemDefinition <plaid.problem_definition.ProblemDefinition>`.

        Use :meth:`add_inputs <plaid.problem_definition.ProblemDefinition.add_inputs>` or :meth:`add_output_scalars_names <plaid.problem_definition.ProblemDefinition.add_output_scalars_names>` to feed the :class:`ProblemDefinition`

        Args:
            path (Union[str,Path], optional): The path from which to load PLAID problem definition files.
            directory_path (Union[str,Path], optional): Deprecated, use `path` instead.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition

                # 1. Create empty instance of ProblemDefinition
                problem_definition = ProblemDefinition()
                print(problem_definition)
                >>> ProblemDefinition()

                # 2. Load problem definition and create ProblemDefinition instance
                problem_definition = ProblemDefinition("path_to_plaid_prob_def")
                print(problem_definition)
                >>> ProblemDefinition(input_scalars_names=['s_1'], output_scalars_names=['s_2'], input_meshes_names=['mesh'], task='regression')
        """

[docs]
        self._task: str = None  # list[task name]


[docs]
        self.in_features_identifiers: list[FeatureIdentifier] = []


[docs]
        self.out_features_identifiers: list[FeatureIdentifier] = []


[docs]
        self.in_scalars_names: list[str] = []


[docs]
        self.out_scalars_names: list[str] = []


[docs]
        self.in_timeseries_names: list[str] = []


[docs]
        self.out_timeseries_names: list[str] = []


[docs]
        self.in_fields_names: list[str] = []


[docs]
        self.out_fields_names: list[str] = []


[docs]
        self.in_meshes_names: list[str] = []


[docs]
        self.out_meshes_names: list[str] = []


[docs]
        self._split: dict[str, IndexType] = None


        if directory_path is not None:
            if path is not None:
                raise ValueError(
                    "Arguments `path` and `directory_path` cannot be both set. Use only `path` as `directory_path` is deprecated."
                )
            else:
                path = directory_path
                logger.warning(
                    "DeprecationWarning: 'directory_path' is deprecated, use 'path' instead."
                )

        if path is not None:
            path = Path(path)
            self._load_from_dir_(path)

    # -------------------------------------------------------------------------#

[docs]
    def get_task(self) -> str:
        """Get the authorized task. None if not defined.

        Returns:
            str: The authorized task, such as "regression" or "classification".
        """
        return self._task



[docs]
    def set_task(self, task: str) -> None:
        """Set the authorized task.

        Args:
            task (str): The authorized task to be set, such as "regression" or "classification".
        """
        if self._task is not None:
            raise ValueError(f"A task is already in self._task: (`{self._task}`)")
        elif task in AUTHORIZED_TASKS:
            self._task = task
        else:
            raise TypeError(
                f"{task} not among authorized tasks. Maybe you want to try among: {AUTHORIZED_TASKS}"
            )


    # -------------------------------------------------------------------------#


[docs]
    def get_split(
        self, indices_name: Optional[str] = None
    ) -> Union[IndexType, dict[str, IndexType]]:
        """Get the split indices. This function returns the split indices, either for a specific split with the provided `indices_name` or all split indices if `indices_name` is not specified.

        Args:
            indices_name (str, optional): The name of the split for which indices are requested. Defaults to None.

        Raises:
            KeyError: If `indices_name` is specified but not found among split names.

        Returns:
            Union[IndexType,dict[str,IndexType]]: If `indices_name` is provided, it returns
            the indices for that split (IndexType). If `indices_name` is not provided, it
            returns a dictionary mapping split names (str) to their respective indices
            (IndexType).

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                split_indices = problem.get_split()
                print(split_indices)
                >>> {'train': [0, 1, 2, ...], 'test': [100, 101, ...]}

                test_indices = problem.get_split('test')
                print(test_indices)
                >>> [100, 101, ...]
        """
        if indices_name is None:
            return self._split
        else:
            assert indices_name in self._split, (
                indices_name + " not among split indices names"
            )
            return self._split[indices_name]



[docs]
    def set_split(self, split: dict[str, IndexType]) -> None:
        """Set the split indices. This function allows you to set the split indices by providing a dictionary mapping split names (str) to their respective indices (IndexType).

        Args:
            split (dict[str,IndexType]):  A dictionary containing split names and their indices.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                new_split = {'train': [0, 1, 2], 'test': [3, 4]}
                problem.set_split(new_split)
        """
        if self._split is not None:  # pragma: no cover
            logger.warning("split already exists -> data will be replaced")
        self._split = split


    # -------------------------------------------------------------------------#

[docs]
    def get_in_features_identifiers(self) -> list[FeatureIdentifier]:
        """Get the input features identifiers of the problem.

        Returns:
            list[FeatureIdentifier]: A list of input feature identifiers.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                in_features_identifiers = problem.get_in_features_identifiers()
                print(in_features_identifiers)
                >>> ['omega', 'pressure']
        """
        return self.in_features_identifiers



[docs]
    def add_in_features_identifiers(self, inputs: list[FeatureIdentifier]) -> None:
        """Add input features identifiers to the problem.

        Args:
            inputs (list[FeatureIdentifier]): A list of input feature identifiers to add.

        Raises:
            ValueError: If some :code:`inputs` are redondant.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                in_features_identifiers = ['omega', 'pressure']
                problem.add_in_features_identifiers(in_features_identifiers)
        """
        if not (len(set(inputs)) == len(inputs)):
            raise ValueError("Some inputs have same identifiers")
        for input in inputs:
            self.add_in_feature_identifier(input)



[docs]
    def add_in_feature_identifier(self, input: FeatureIdentifier) -> None:
        """Add an input feature identifier or identifier to the problem.

        Args:
            input (FeatureIdentifier):  The identifier or identifier of the input feature to add.

        Raises:
            ValueError: If the specified input feature is already in the list of inputs.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                input_identifier = 'pressure'
                problem.add_in_feature_identifier(input_identifier)
        """
        if input in self.in_features_identifiers:
            raise ValueError(f"{input} is already in self.in_features_identifiers")
        self.in_features_identifiers.append(input)
        self.in_features_identifiers.sort(key=lambda x: x["type"])



[docs]
    def filter_in_features_identifiers(
        self, identifiers: list[FeatureIdentifier]
    ) -> list[FeatureIdentifier]:
        """Filter and get input features features corresponding to a sorted list of identifiers.

        Args:
            identifiers (list[FeatureIdentifier]): A list of identifiers for which to retrieve corresponding input features.

        Returns:
            list[FeatureIdentifier]: A sorted list of input feature identifiers or categories corresponding to the provided identifiers.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                features_identifiers = ['omega', 'pressure', 'temperature']
                input_features = problem.filter_in_features_identifiers(features_identifiers)
                print(input_features)
                >>> ['omega', 'pressure']
        """
        return sorted(set(identifiers).intersection(self.get_in_features_identifiers()))


    # -------------------------------------------------------------------------#

[docs]
    def get_out_features_identifiers(self) -> list[FeatureIdentifier]:
        """Get the output features identifiers of the problem.

        Returns:
            list[FeatureIdentifier]: A list of output feature identifiers.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                outputs_identifiers = problem.get_out_features_identifiers()
                print(outputs_identifiers)
                >>> ['compression_rate', 'in_massflow', 'isentropic_efficiency']
        """
        return self.out_features_identifiers



[docs]
    def add_out_features_identifiers(self, outputs: list[FeatureIdentifier]) -> None:
        """Add output features identifiers to the problem.

        Args:
            outputs (list[FeatureIdentifier]): A list of output feature identifiers to add.

        Raises:
            ValueError: if some :code:`outputs` are redondant.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                out_features_identifiers = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                problem.add_out_features_identifiers(out_features_identifiers)
        """
        if not (len(set(outputs)) == len(outputs)):
            raise ValueError("Some outputs have same identifiers")
        for output in outputs:
            self.add_out_feature_identifier(output)



[docs]
    def add_out_feature_identifier(self, output: FeatureIdentifier) -> None:
        """Add an output feature identifier or identifier to the problem.

        Args:
            output (FeatureIdentifier):  The identifier or identifier of the output feature to add.

        Raises:
            ValueError: If the specified output feature is already in the list of outputs.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                out_features_identifiers = 'pressure'
                problem.add_out_feature_identifier(out_features_identifiers)
        """
        if output in self.out_features_identifiers:
            raise ValueError(f"{output} is already in self.out_features_identifiers")
        self.out_features_identifiers.append(output)
        self.in_features_identifiers.sort(key=lambda x: x["type"])



[docs]
    def filter_out_features_identifiers(
        self, identifiers: list[FeatureIdentifier]
    ) -> list[FeatureIdentifier]:
        """Filter and get output features corresponding to a sorted list of identifiers.

        Args:
            identifiers (list[FeatureIdentifier]): A list of identifiers for which to retrieve corresponding output features.

        Returns:
            list[FeatureIdentifier]: A sorted list of output feature identifiers or categories corresponding to the provided identifiers.

        Example:
            .. code-block:: python

                from plaid.problem_definition import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                features_identifiers = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                output_features = problem.filter_out_features_identifiers(features_identifiers)
                print(output_features)
                >>> ['in_massflow']
        """
        return sorted(
            set(identifiers).intersection(self.get_out_features_identifiers())
        )


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_input_scalars_names(self) -> list[str]:
        """Get the input scalars names of the problem.

        Returns:
            list[str]: A list of input feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_scalars_names = problem.get_input_scalars_names()
                print(input_scalars_names)
                >>> ['omega', 'pressure']
        """
        return self.in_scalars_names


    @deprecated(
        "use `add_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_scalars_names(self, inputs: list[str]) -> None:
        """Add input scalars names to the problem.

        Args:
            inputs (list[str]): A list of input feature names to add.

        Raises:
            ValueError: If some :code:`inputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_scalars_names = ['omega', 'pressure']
                problem.add_input_scalars_names(input_scalars_names)
        """
        if not (len(set(inputs)) == len(inputs)):
            raise ValueError("Some inputs have same names")
        for input in inputs:
            self.add_input_scalar_name(input)


    @deprecated(
        "use `add_in_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_scalar_name(self, input: str) -> None:
        """Add an input scalar name to the problem.

        Args:
            input (str):  The name of the input feature to add.

        Raises:
            ValueError: If the specified input feature is already in the list of inputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_name = 'pressure'
                problem.add_input_scalar_name(input_name)
        """
        if input in self.in_scalars_names:
            raise ValueError(f"{input} is already in self.in_scalars_names")
        self.in_scalars_names.append(input)
        self.in_scalars_names.sort()


    @deprecated(
        "use `filter_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def filter_input_scalars_names(self, names: list[str]) -> list[str]:
        """Filter and get input scalars features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding input features.

        Returns:
            list[str]: A sorted list of input feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                scalars_names = ['omega', 'pressure', 'temperature']
                input_features = problem.filter_input_scalars_names(scalars_names)
                print(input_features)
                >>> ['omega', 'pressure']
        """
        return sorted(set(names).intersection(self.get_input_scalars_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_output_scalars_names(self) -> list[str]:
        """Get the output scalars names of the problem.

        Returns:
            list[str]: A list of output feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                outputs_names = problem.get_output_scalars_names()
                print(outputs_names)
                >>> ['compression_rate', 'in_massflow', 'isentropic_efficiency']
        """
        return self.out_scalars_names


    @deprecated(
        "use `add_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_scalars_names(self, outputs: list[str]) -> None:
        """Add output scalars names to the problem.

        Args:
            outputs (list[str]): A list of output feature names to add.

        Raises:
            ValueError: if some :code:`outputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_scalars_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                problem.add_output_scalars_names(output_scalars_names)
        """
        if not (len(set(outputs)) == len(outputs)):
            raise ValueError("Some outputs have same names")
        for output in outputs:
            self.add_output_scalar_name(output)


    @deprecated(
        "use `add_out_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_scalar_name(self, output: str) -> None:
        """Add an output scalar name to the problem.

        Args:
            output (str):  The name of the output feature to add.

        Raises:
            ValueError: If the specified output feature is already in the list of outputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_scalars_names = 'pressure'
                problem.add_output_scalar_name(output_scalars_names)
        """
        if output in self.out_scalars_names:
            raise ValueError(f"{output} is already in self.out_scalars_names")
        self.out_scalars_names.append(output)
        self.in_scalars_names.sort()



[docs]
    def filter_output_scalars_names(self, names: list[str]) -> list[str]:
        """Filter and get output features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding output features.

        Returns:
            list[str]: A sorted list of output feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                scalars_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                output_features = problem.filter_output_scalars_names(scalars_names)
                print(output_features)
                >>> ['in_massflow']
        """
        return sorted(set(names).intersection(self.get_output_scalars_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_input_fields_names(self) -> list[str]:
        """Get the input fields names of the problem.

        Returns:
            list[str]: A list of input feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_fields_names = problem.get_input_fields_names()
                print(input_fields_names)
                >>> ['omega', 'pressure']
        """
        return self.in_fields_names


    @deprecated(
        "use `add_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_fields_names(self, inputs: list[str]) -> None:
        """Add input fields names to the problem.

        Args:
            inputs (list[str]): A list of input feature names to add.

        Raises:
            ValueError: If some :code:`inputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_fields_names = ['omega', 'pressure']
                problem.add_input_fields_names(input_fields_names)
        """
        if not (len(set(inputs)) == len(inputs)):
            raise ValueError("Some inputs have same names")
        for input in inputs:
            self.add_input_field_name(input)


    @deprecated(
        "use `add_in_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_field_name(self, input: str) -> None:
        """Add an input field name to the problem.

        Args:
            input (str):  The name of the input feature to add.

        Raises:
            ValueError: If the specified input feature is already in the list of inputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_name = 'pressure'
                problem.add_input_field_name(input_name)
        """
        if input in self.in_fields_names:
            raise ValueError(f"{input} is already in self.in_fields_names")
        self.in_fields_names.append(input)
        self.in_fields_names.sort()



[docs]
    def filter_input_fields_names(self, names: list[str]) -> list[str]:
        """Filter and get input fields features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding input features.

        Returns:
            list[str]: A sorted list of input feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_fields_names = ['omega', 'pressure', 'temperature']
                input_features = problem.filter_input_fields_names(input_fields_names)
                print(input_features)
                >>> ['omega', 'pressure']
        """
        return sorted(set(names).intersection(self.get_input_fields_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_output_fields_names(self) -> list[str]:
        """Get the output fields names of the problem.

        Returns:
            list[str]: A list of output feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                outputs_names = problem.get_output_fields_names()
                print(outputs_names)
                >>> ['compression_rate', 'in_massflow', 'isentropic_efficiency']
        """
        return self.out_fields_names


    @deprecated(
        "use `add_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_fields_names(self, outputs: list[str]) -> None:
        """Add output fields names to the problem.

        Args:
            outputs (list[str]): A list of output feature names to add.

        Raises:
            ValueError: if some :code:`outputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_fields_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                problem.add_output_fields_names(output_fields_names)
        """
        if not (len(set(outputs)) == len(outputs)):
            raise ValueError("Some outputs have same names")
        for output in outputs:
            self.add_output_field_name(output)


    @deprecated(
        "use `add_out_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_field_name(self, output: str) -> None:
        """Add an output field name to the problem.

        Args:
            output (str):  The name of the output feature to add.

        Raises:
            ValueError: If the specified output feature is already in the list of outputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_fields_names = 'pressure'
                problem.add_output_field_name(output_fields_names)
        """
        if output in self.out_fields_names:
            raise ValueError(f"{output} is already in self.out_fields_names")
        self.out_fields_names.append(output)
        self.out_fields_names.sort()



[docs]
    def filter_output_fields_names(self, names: list[str]) -> list[str]:
        """Filter and get output features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding output features.

        Returns:
            list[str]: A sorted list of output feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                output_fields_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                output_features = problem.filter_output_fields_names(output_fields_names)
                print(output_features)
                >>> ['in_massflow']
        """
        return sorted(set(names).intersection(self.get_output_fields_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_input_timeseries_names(self) -> list[str]:
        """Get the input timeseries names of the problem.

        Returns:
            list[str]: A list of input feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_timeseries_names = problem.get_input_timeseries_names()
                print(input_timeseries_names)
                >>> ['omega', 'pressure']
        """
        return self.in_timeseries_names


    @deprecated(
        "use `add_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_timeseries_names(self, inputs: list[str]) -> None:
        """Add input timeseries names to the problem.

        Args:
            inputs (list[str]): A list of input feature names to add.

        Raises:
            ValueError: If some :code:`inputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_timeseries_names = ['omega', 'pressure']
                problem.add_input_timeseries_names(input_timeseries_names)
        """
        if not (len(set(inputs)) == len(inputs)):
            raise ValueError("Some inputs have same names")
        for input in inputs:
            self.add_input_timeseries_name(input)


    @deprecated(
        "use `add_in_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_timeseries_name(self, input: str) -> None:
        """Add an input timeseries name to the problem.

        Args:
            input (str):  The name of the input feature to add.

        Raises:
            ValueError: If the specified input feature is already in the list of inputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_name = 'pressure'
                problem.add_input_timeseries_name(input_name)
        """
        if input in self.in_timeseries_names:
            raise ValueError(f"{input} is already in self.in_timeseries_names")
        self.in_timeseries_names.append(input)
        self.in_timeseries_names.sort()



[docs]
    def filter_input_timeseries_names(self, names: list[str]) -> list[str]:
        """Filter and get input timeseries features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding input features.

        Returns:
            list[str]: A sorted list of input feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_timeseries_names = ['omega', 'pressure', 'temperature']
                input_features = problem.filter_input_timeseries_names(input_timeseries_names)
                print(input_features)
                >>> ['omega', 'pressure']
        """
        return sorted(set(names).intersection(self.get_input_timeseries_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_output_timeseries_names(self) -> list[str]:
        """Get the output timeseries names of the problem.

        Returns:
            list[str]: A list of output feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                outputs_names = problem.get_output_timeseries_names()
                print(outputs_names)
                >>> ['compression_rate', 'in_massflow', 'isentropic_efficiency']
        """
        return self.out_timeseries_names


    @deprecated(
        "use `add_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_timeseries_names(self, outputs: list[str]) -> None:
        """Add output timeseries names to the problem.

        Args:
            outputs (list[str]): A list of output feature names to add.

        Raises:
            ValueError: if some :code:`outputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_timeseries_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                problem.add_output_timeseries_names(output_timeseries_names)
        """
        if not (len(set(outputs)) == len(outputs)):
            raise ValueError("Some outputs have same names")
        for output in outputs:
            self.add_output_timeseries_name(output)


    @deprecated(
        "use `add_out_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_timeseries_name(self, output: str) -> None:
        """Add an output timeseries name to the problem.

        Args:
            output (str):  The name of the output feature to add.

        Raises:
            ValueError: If the specified output feature is already in the list of outputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_timeseries_names = 'pressure'
                problem.add_output_timeseries_name(output_timeseries_names)
        """
        if output in self.out_timeseries_names:
            raise ValueError(f"{output} is already in self.out_timeseries_names")
        self.out_timeseries_names.append(output)
        self.in_timeseries_names.sort()



[docs]
    def filter_output_timeseries_names(self, names: list[str]) -> list[str]:
        """Filter and get output features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding output features.

        Returns:
            list[str]: A sorted list of output feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                output_timeseries_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                output_features = problem.filter_output_timeseries_names(output_timeseries_names)
                print(output_features)
                >>> ['in_massflow']
        """
        return sorted(set(names).intersection(self.get_output_timeseries_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_input_meshes_names(self) -> list[str]:
        """Get the input meshes names of the problem.

        Returns:
            list[str]: A list of input feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_meshes_names = problem.get_input_meshes_names()
                print(input_meshes_names)
                >>> ['omega', 'pressure']
        """
        return self.in_meshes_names


    @deprecated(
        "use `add_in_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_meshes_names(self, inputs: list[str]) -> None:
        """Add input meshes names to the problem.

        Args:
            inputs (list[str]): A list of input feature names to add.

        Raises:
            ValueError: If some :code:`inputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_meshes_names = ['omega', 'pressure']
                problem.add_input_meshes_names(input_meshes_names)
        """
        if not (len(set(inputs)) == len(inputs)):
            raise ValueError("Some inputs have same names")
        for input in inputs:
            self.add_input_mesh_name(input)


    @deprecated(
        "use `add_in_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_input_mesh_name(self, input: str) -> None:
        """Add an input mesh name to the problem.

        Args:
            input (str):  The name of the input feature to add.

        Raises:
            ValueError: If the specified input feature is already in the list of inputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                input_name = 'pressure'
                problem.add_input_mesh_name(input_name)
        """
        if input in self.in_meshes_names:
            raise ValueError(f"{input} is already in self.in_meshes_names")
        self.in_meshes_names.append(input)
        self.in_meshes_names.sort()



[docs]
    def filter_input_meshes_names(self, names: list[str]) -> list[str]:
        """Filter and get input meshes features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding input features.

        Returns:
            list[str]: A sorted list of input feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                input_meshes_names = ['omega', 'pressure', 'temperature']
                input_features = problem.filter_input_meshes_names(input_meshes_names)
                print(input_features)
                >>> ['omega', 'pressure']
        """
        return sorted(set(names).intersection(self.get_input_meshes_names()))


    # -------------------------------------------------------------------------#
    @deprecated(
        "use `get_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def get_output_meshes_names(self) -> list[str]:
        """Get the output meshes names of the problem.

        Returns:
            list[str]: A list of output feature names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                outputs_names = problem.get_output_meshes_names()
                print(outputs_names)
                >>> ['compression_rate', 'in_massflow', 'isentropic_efficiency']
        """
        return self.out_meshes_names


    @deprecated(
        "use `add_out_features_identifiers` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_meshes_names(self, outputs: list[str]) -> None:
        """Add output meshes names to the problem.

        Args:
            outputs (list[str]): A list of output feature names to add.

        Raises:
            ValueError: if some :code:`outputs` are redondant.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_meshes_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                problem.add_output_meshes_names(output_meshes_names)
        """
        if not (len(set(outputs)) == len(outputs)):
            raise ValueError("Some outputs have same names")
        for output in outputs:
            self.add_output_mesh_name(output)


    @deprecated(
        "use `add_out_feature_identifier` instead", version="0.1.8", removal="0.2.0"
    )

[docs]
    def add_output_mesh_name(self, output: str) -> None:
        """Add an output mesh name to the problem.

        Args:
            output (str):  The name of the output feature to add.

        Raises:
            ValueError: If the specified output feature is already in the list of outputs.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                output_meshes_names = 'pressure'
                problem.add_output_mesh_name(output_meshes_names)
        """
        if output in self.out_meshes_names:
            raise ValueError(f"{output} is already in self.out_meshes_names")
        self.out_meshes_names.append(output)
        self.in_meshes_names.sort()



[docs]
    def filter_output_meshes_names(self, names: list[str]) -> list[str]:
        """Filter and get output features corresponding to a list of names.

        Args:
            names (list[str]): A list of names for which to retrieve corresponding output features.

        Returns:
            list[str]: A sorted list of output feature names or categories corresponding to the provided names.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                output_meshes_names = ['compression_rate', 'in_massflow', 'isentropic_efficiency']
                output_features = problem.filter_output_meshes_names(output_meshes_names)
                print(output_features)
                >>> ['in_massflow']
        """
        return sorted(set(names).intersection(self.get_output_meshes_names()))


    # -------------------------------------------------------------------------#

[docs]
    def get_all_indices(self) -> list[int]:
        """Get all indices from splits.

        Returns:
            list[int]: list containing all unique indices.
        """
        all_indices = []
        for indices in self.get_split().values():
            all_indices += list(indices)
        return list(set(all_indices))


    # -------------------------------------------------------------------------#

[docs]
    def _save_to_dir_(self, path: Union[str, Path]) -> None:
        """Save problem information, inputs, outputs, and split to the specified directory in YAML and CSV formats.

        Args:
            path (Union[str,Path]): The directory where the problem information will be saved.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                problem._save_to_dir_("/path/to/save_directory")
        """
        path = Path(path)

        if not (path.is_dir()):
            path.mkdir()

        data = {
            "task": self._task,
            "input_features": [dict(**d) for d in self.in_features_identifiers],
            "output_features": [dict(**d) for d in self.out_features_identifiers],
            "input_scalars": self.in_scalars_names,  # list[input scalar name]
            "output_scalars": self.out_scalars_names,  # list[output scalar name]
            "input_fields": self.in_fields_names,  # list[input field name]
            "output_fields": self.out_fields_names,  # list[output field name]
            "input_timeseries": self.in_timeseries_names,  # list[input timeseries name]
            "output_timeseries": self.out_timeseries_names,  # list[output timeseries name]
            "input_meshes": self.in_meshes_names,  # list[input mesh name]
            "output_meshes": self.out_meshes_names,  # list[output mesh name]
        }

        pbdef_fname = path / "problem_infos.yaml"
        with pbdef_fname.open("w") as file:
            yaml.dump(data, file, default_flow_style=False, sort_keys=False)

        split_fname = path / "split.json"
        if self._split is not None:
            with split_fname.open("w") as file:
                json.dump(self._split, file)


    @classmethod

[docs]
    def load(cls, path: Union[str, Path]) -> Self:  # pragma: no cover
        """Load data from a specified directory.

        Args:
            path (Union[str,Path]): The path from which to load files.

        Returns:
            Self: The loaded dataset (Dataset).
        """
        instance = cls()
        instance._load_from_dir_(path)
        return instance



[docs]
    def _load_from_dir_(self, path: Union[str, Path]) -> None:
        """Load problem information, inputs, outputs, and split from the specified directory in YAML and CSV formats.

        Args:
            path (Union[str,Path]): The directory from which to load the problem information.

        Raises:
            FileNotFoundError: Triggered if the provided directory or file problem_infos.yaml does not exist
            FileExistsError: Triggered if the provided path is a file instead of a directory.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                problem._load_from_dir_("/path/to/load_directory")
        """
        path = Path(path)

        if not path.exists():
            raise FileNotFoundError(f'Directory "{path}" does not exist. Abort')

        if not path.is_dir():
            raise FileExistsError(f'"{path}" is not a directory. Abort')

        pbdef_fname = path / "problem_infos.yaml"
        data = {}  # To avoid crash if pbdef_fname does not exist
        if pbdef_fname.is_file():
            with pbdef_fname.open("r") as file:
                data = yaml.safe_load(file)
        else:
            raise FileNotFoundError(
                f"file with path `{pbdef_fname}` does not exist. Abort"
            )

        self._task = data["task"]
        self.in_features_identifiers = [
            FeatureIdentifier(**tup) for tup in data["input_features"]
        ]
        self.out_features_identifiers = [
            FeatureIdentifier(**tup) for tup in data["output_features"]
        ]
        self.in_scalars_names = data["input_scalars"]
        self.out_scalars_names = data["output_scalars"]
        self.in_fields_names = data["input_fields"]
        self.out_fields_names = data["output_fields"]
        self.in_timeseries_names = data["input_timeseries"]
        self.out_timeseries_names = data["output_timeseries"]
        self.in_meshes_names = data["input_meshes"]
        self.out_meshes_names = data["output_meshes"]

        # if it was saved with version <=0.1.7 it is a .csv else it is .json
        split = {}
        split_fname_csv = path / "split.csv"
        split_fname_json = path / "split.json"
        if split_fname_csv.is_file():
            with split_fname_csv.open("r") as file:
                reader = csv.reader(file, delimiter=",")
                for row in reader:
                    split[row[0]] = [int(i) for i in row[1:]]
        elif split_fname_json.is_file():
            with split_fname_json.open("r") as file:
                split = json.load(file)
        else:
            logger.warning(
                f"file with path `{split_fname_csv}` or `{split_fname_json}` does not exist. Splits will not be set"
            )
        self._split = split


    # -------------------------------------------------------------------------#
    def __repr__(self) -> str:
        """Return a string representation of the problem.

        Returns:
            str: A string representation of the overview of problem content.

        Example:
            .. code-block:: python

                from plaid import ProblemDefinition
                problem = ProblemDefinition()
                # [...]
                print(problem)
                >>> ProblemDefinition(input_scalars_names=['s_1'], output_scalars_names=['s_2'], input_meshes_names=['mesh'], task='regression', split_names=['train', 'val'])
        """
        str_repr = "ProblemDefinition("

        # ---# features
        if len(self.in_features_identifiers) > 0:
            in_features_identifiers = self.in_features_identifiers
            str_repr += f"{in_features_identifiers=}, "
        if len(self.out_features_identifiers) > 0:
            out_features_identifiers = self.out_features_identifiers
            str_repr += f"{out_features_identifiers=}, "

        # ---# scalars
        if len(self.in_scalars_names) > 0:
            input_scalars_names = self.in_scalars_names
            str_repr += f"{input_scalars_names=}, "
        if len(self.out_scalars_names) > 0:
            output_scalars_names = self.out_scalars_names
            str_repr += f"{output_scalars_names=}, "
        # ---# fields
        if len(self.in_fields_names) > 0:
            input_fields_names = self.in_fields_names
            str_repr += f"{input_fields_names=}, "
        if len(self.out_fields_names) > 0:
            output_fields_names = self.out_fields_names
            str_repr += f"{output_fields_names=}, "
        # ---# timeseries
        if len(self.in_timeseries_names) > 0:
            input_timeseries_names = self.in_timeseries_names
            str_repr += f"{input_timeseries_names=}, "
        if len(self.out_timeseries_names) > 0:
            output_timeseries_names = self.out_timeseries_names
            str_repr += f"{output_timeseries_names=}, "
        # ---# meshes
        if len(self.in_meshes_names) > 0:
            input_meshes_names = self.in_meshes_names
            str_repr += f"{input_meshes_names=}, "
        if len(self.out_meshes_names) > 0:
            output_meshes_names = self.out_meshes_names
            str_repr += f"{output_meshes_names=}, "
        # ---# task
        if self._task is not None:
            task = self._task
            str_repr += f"{task=}, "
        # ---# split
        if self._split is not None:
            split_names = list(self._split.keys())
            str_repr += f"{split_names=}, "

        if str_repr[-2:] == ", ":
            str_repr = str_repr[:-2]
        str_repr += ")"
        return str_repr