Skip to content

plaid.storage.backend_api

plaid.storage.backend_api

Protocol definition for storage backend modules.

plaid.storage.backend_api.BackendModule

Protocol describing required methods for storage backend plugins.

plaid.storage.backend_api.BackendModule.init_from_disk staticmethod

init_from_disk(path)

Load a dataset dictionary from local storage.

Source code in plaid/storage/backend_api.py
@staticmethod
def init_from_disk(path: Union[str, Path]) -> Mapping[str, Any]:
    """Load a dataset dictionary from local storage."""
    ...

plaid.storage.backend_api.BackendModule.download_from_hub staticmethod

download_from_hub(
    repo_id,
    local_dir,
    split_ids=None,
    features=None,
    overwrite=False,
)

Download a dataset dictionary from a remote hub into a local folder.

Source code in plaid/storage/backend_api.py
@staticmethod
def download_from_hub(
    repo_id: str,
    local_dir: Union[str, Path],
    split_ids: Optional[dict[str, Iterable[int]]] = None,
    features: Optional[list[str]] = None,  # noqa: ARG001
    overwrite: bool = False,
) -> str:
    """Download a dataset dictionary from a remote hub into a local folder."""
    ...

plaid.storage.backend_api.BackendModule.init_datasetdict_streaming_from_hub staticmethod

init_datasetdict_streaming_from_hub(
    repo_id, split_ids=None, features=None
)

Initialize a streaming dataset dictionary from a remote hub.

Source code in plaid/storage/backend_api.py
@staticmethod
def init_datasetdict_streaming_from_hub(
    repo_id: str,
    split_ids: Optional[dict[str, Iterable[int]]] = None,
    features: Optional[list[str]] = None,  # noqa: ARG001
) -> dict[str, IterableDataset]:
    """Initialize a streaming dataset dictionary from a remote hub."""
    ...

plaid.storage.backend_api.BackendModule.generate_to_disk staticmethod

generate_to_disk(
    output_folder,
    generators,
    variable_schema=None,
    gen_kwargs=None,
    num_proc=1,
    verbose=False,
)

Generate and save a dataset dictionary to local storage.

Source code in plaid/storage/backend_api.py
@staticmethod
def generate_to_disk(
    output_folder: Union[str, Path],
    generators: dict[str, Callable[..., Generator["Sample", None, None]]],
    variable_schema: Optional[dict[str, dict]] = None,  # noqa: ARG001
    gen_kwargs: Optional[dict[str, dict[str, Any]]] = None,
    num_proc: int = 1,
    verbose: bool = False,
) -> None:
    """Generate and save a dataset dictionary to local storage."""
    ...

plaid.storage.backend_api.BackendModule.push_local_to_hub staticmethod

push_local_to_hub(repo_id, local_dir, num_workers=1)

Push a local dataset dictionary to a remote hub repository.

Source code in plaid/storage/backend_api.py
@staticmethod
def push_local_to_hub(
    repo_id: str, local_dir: Union[str, Path], num_workers: int = 1
) -> None:
    """Push a local dataset dictionary to a remote hub repository."""
    ...

plaid.storage.backend_api.BackendModule.configure_dataset_card staticmethod

configure_dataset_card(
    repo_id,
    infos,
    local_dir=None,
    viewer=False,
    pretty_name=None,
    dataset_long_description=None,
    illustration_urls=None,
    arxiv_paper_urls=None,
)

Configure metadata for a dataset card associated with a repository.

Source code in plaid/storage/backend_api.py
@staticmethod
def configure_dataset_card(
    repo_id: str,
    infos: "Infos",
    local_dir: Optional[Union[str, Path]] = None,
    viewer: bool = False,
    pretty_name: Optional[str] = None,
    dataset_long_description: Optional[str] = None,
    illustration_urls: Optional[list[str]] = None,
    arxiv_paper_urls: Optional[list[str]] = None,
) -> None:  # pragma: no cover
    """Configure metadata for a dataset card associated with a repository."""
    ...

plaid.storage.backend_api.BackendModule.to_var_sample_dict staticmethod

to_var_sample_dict(
    dataset, idx, features=None, indexers=None
)

Convert a backend sample to PLAID variable-sample dictionary representation.

Source code in plaid/storage/backend_api.py
@staticmethod
def to_var_sample_dict(
    dataset: "Dataset",
    idx: int,
    features: Optional[list[str]] = None,
    indexers: Optional[dict[str, Any]] = None,
) -> dict[str, Optional[np.ndarray]]:
    """Convert a backend sample to PLAID variable-sample dictionary representation."""
    ...

plaid.storage.backend_api.BackendModule.sample_to_var_sample_dict staticmethod

sample_to_var_sample_dict(sample)

Convert a backend-native sample object to a variable-sample dictionary.

Source code in plaid/storage/backend_api.py
@staticmethod
def sample_to_var_sample_dict(
    sample: dict[str, Any],
) -> dict[str, Any]:
    """Convert a backend-native sample object to a variable-sample dictionary."""
    ...