Source code for aiqclib.classify.step7_concat_datasets.dataset_all

"""
This module defines the ConcatDataSetAll class, which extends ConcatDatasetsBase
to facilitate the concatenation of model predictions with the original input dataset.
It is designed to integrate into a larger data quality control (DQC) workflow,
specifically within the classification and merging steps.
"""

from typing import Optional, Dict

import polars as pl

from aiqclib.classify.step7_concat_datasets.concat_base import ConcatDatasetsBase
from aiqclib.common.base.config_base import ConfigBase


[docs] class ConcatDataSetAll(ConcatDatasetsBase): """ A subclass of :class:`ConcatDatasetsBase` to concatenate predictions and the input dataset. This class sets its :attr:`expected_class_name` to ``"ConcatDataSetAll"``, ensuring it is recognized in the YAML configuration as a valid processing class. It inherits the concatenation pipeline from :class:`ConcatDatasetsBase`. :ivar expected_class_name: The identifier used for configuration mapping. :vartype expected_class_name: str """ expected_class_name: str = "ConcatDataSetAll" def __init__( self, config: ConfigBase, input_data: Optional[pl.DataFrame] = None, predictions: Optional[Dict[str, pl.DataFrame]] = None, ) -> None: """ Initialize the concatenation workflow for predictions and input data. :param config: A dataset configuration object that manages paths, target definitions, and parameters for data processing. :type config: ConfigBase :param input_data: A Polars DataFrame containing all available data to which predictions will be concatenated, defaults to None. :type input_data: Optional[pl.DataFrame] :param predictions: A dictionary mapping each target (e.g., a classification task) to its respective Polars DataFrame of predictions, defaults to None. :type predictions: Optional[Dict[str, pl.DataFrame]] """ super().__init__( config=config, input_data=input_data, predictions=predictions, )