"""
Standardized Feature Extraction and Scaling Module.
This module defines the `FeatureBase` abstract base class (ABC), which provides a
standardized framework for feature engineering tasks using the Polars library.
It ensures that subclasses implement a consistent pipeline for feature
extraction and multi-stage scaling.
"""
from abc import ABC, abstractmethod
from typing import Dict, Optional
import polars as pl
[docs]
class FeatureBase(ABC):
"""
Abstract base class for extracting and scaling features.
Child classes must implement all abstract methods to define specific
logic for feature generation and normalization. This class serves as a
container for the data and metadata required during the transformation
lifecycle.
:ivar target_name: Name of the target variable.
:ivar feature_info: Metadata or configuration for features.
:ivar selected_profiles: Polars DataFrame of pre-selected profiles.
:ivar filtered_input: Polars DataFrame of pre-filtered input data.
:ivar selected_rows: Mapping of identifiers to specific Polars DataFrames.
:ivar summary_stats: Polars DataFrame containing summary statistics.
:ivar features: Polars DataFrame containing the processed features.
"""
def __init__(
self,
target_name: Optional[str] = None,
feature_info: Optional[Dict] = None,
selected_profiles: Optional[pl.DataFrame] = None,
filtered_input: Optional[pl.DataFrame] = None,
selected_rows: Optional[Dict[str, pl.DataFrame]] = None,
summary_stats: Optional[pl.DataFrame] = None,
) -> None:
"""
Initialize the feature-extraction base class with optional data and metadata.
:param target_name: Name of the target variable to use when extracting features.
:type target_name: Optional[str]
:param feature_info: A dictionary containing metadata or configuration about features.
:type feature_info: Optional[Dict]
:param selected_profiles: A Polars DataFrame containing pre-selected profiles.
:type selected_profiles: Optional[pl.DataFrame]
:param filtered_input: A Polars DataFrame that may already include filters.
:type filtered_input: Optional[pl.DataFrame]
:param selected_rows: A dictionary mapping identifiers to Polars DataFrames.
:type selected_rows: Optional[Dict[str, pl.DataFrame]]
:param summary_stats: A Polars DataFrame of summary statistics for transformations.
:type summary_stats: Optional[pl.DataFrame]
:return: None
:rtype: None
"""
self.target_name: Optional[str] = target_name
self.feature_info: Optional[Dict] = feature_info
self.selected_profiles: Optional[pl.DataFrame] = selected_profiles
self.filtered_input: Optional[pl.DataFrame] = filtered_input
self.selected_rows: Optional[Dict[str, pl.DataFrame]] = selected_rows
self.summary_stats: Optional[pl.DataFrame] = summary_stats
self.features: Optional[pl.DataFrame] = None
[docs]
@abstractmethod
def scale_first(self) -> None:
"""
Apply the first pass of scaling or normalization to the extracted features.
Typically used for initial transformations such as standard scaling or
handling outliers. This method should update the `self.features` attribute.
:return: None
:rtype: None
"""
pass # pragma: no cover
[docs]
@abstractmethod
def scale_second(self) -> None:
"""
Apply a secondary scaling or refinement step to the features.
Used for additional adjustments or domain-specific normalizations
required after the first scaling pass. This method should update
the `self.features` attribute.
:return: None
:rtype: None
"""
pass # pragma: no cover