Source code for aiqclib.common.config.yaml_schema

"""
Module providing YAML-based JSON schemas used to validate dataset, training,
and classification configuration files. Each function returns a YAML string
describing the structure and constraints for a specific configuration schema.
"""


[docs] def get_data_set_config_schema() -> str: """ Retrieve the YAML-based JSON schema for dataset configurations. The returned schema requires certain objects and properties (e.g., path_info_sets, target_sets, feature_sets, etc.), each with nested type constraints and additional properties set to false when appropriate. :return: A YAML string representing the JSON schema for dataset configurations. :rtype: str """ yaml_schema = """ --- type: object properties: path_info_sets: type: array items: type: object properties: name: type: string common: type: object properties: base_path: type: string step_folder_name: type: string required: - base_path additionalProperties: false input: type: object properties: base_path: type: string step_folder_name: type: string required: - base_path - step_folder_name additionalProperties: false select: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false locate: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false split: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false normalize: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false required: - name - common - input additionalProperties: false target_sets: type: array items: type: object properties: name: type: string variables: type: array items: type: object properties: name: type: string flag: type: string pos_flag_values: type: array neg_flag_values: type: array required: - name - flag - pos_flag_values - neg_flag_values additionalProperties: false required: - name - variables additionalProperties: false summary_stats_sets: type: array items: type: object properties: name: type: string stats: type: array items: type: object properties: name: type: string col_names: type: array items: type: string required: - name - col_names required: - name - stats additionalProperties: false feature_sets: type: array items: type: object properties: name: type: string features: type: array items: type: string required: - name - features additionalProperties: false feature_param_sets: type: array items: type: object properties: name: type: string params: type: array items: type: object properties: feature: type: string col_names: type: array items: type: string stats_set: type: object properties: name: type: string type: type: string convert: type: string flank_up: type: integer flank_down: type: integer summary_stats_names: type: array items: type: string stats: type: object required: - feature - col_names additionalProperties: false required: - name - params additionalProperties: false feature_stats_sets: type: array items: type: object properties: name: type: string min_max: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats auto_min_max: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats standard: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats required: - name additionalProperties: false step_class_sets: type: array items: type: object properties: name: type: string steps: type: object properties: input: type: string summary: type: string select: type: string locate: type: string extract: type: string split: type: string required: - input - summary - select - locate - extract - split additionalProperties: false required: - name - steps additionalProperties: false step_param_sets: type: array items: type: object properties: name: type: string type: type: string steps: type: object properties: input: type: object properties: sub_steps: type: object properties: rename_columns: type: boolean filter_rows: type: boolean validate_columns: type: boolean create_columns: type: boolean required: - rename_columns - filter_rows additionalProperties: false rename_dict: type: object filter_method_dict: type: object properties: remove_years: type: array keep_years: type: array additionalProperties: false create_column_dict: type: object properties: key_columns: type: array sort_columns: type: array columns: type: array additionalProperties: false required: - sub_steps additionalProperties: false summary: type: object select: type: object locate: type: object extract: type: object properties: drop_key_columns: type: boolean split: type: object normalize: type: object properties: file_name: type: string additionalProperties: false required: - input - summary - select - locate - extract - split additionalProperties: false required: - name - steps additionalProperties: false data_sets: type: array items: type: object properties: name: type: string dataset_folder_name: type: string input_file_name: type: string path_info: type: string target_set: type: string summary_stats_set: type: string feature_set: type: string feature_param_set: type: string feature_stats_set: type: string step_class_set: type: string step_param_set: type: string required: - name - dataset_folder_name - input_file_name - path_info - target_set - summary_stats_set - feature_set - feature_param_set - feature_stats_set - step_class_set - step_param_set additionalProperties: false additionalProperties: false required: - path_info_sets - target_sets - summary_stats_sets - feature_sets - feature_param_sets - feature_stats_sets - step_class_sets - step_param_sets - data_sets """ return yaml_schema
[docs] def get_training_config_schema() -> str: """ Retrieve the YAML-based JSON schema for training configurations. The returned schema specifies required objects and properties under categories such as path_info_sets, target_sets, step_class_sets, step_param_sets, and training_sets. Additional properties are disallowed to ensure constraints remain strict. :return: A YAML string representing the JSON schema for training configurations. :rtype: str """ yaml_schema = """ --- type: object properties: path_info_sets: type: array items: type: object properties: name: type: string common: type: object properties: base_path: type: string step_folder_name: type: string required: - base_path additionalProperties: false input: type: object properties: base_path: type: string step_folder_name: type: string required: - step_folder_name additionalProperties: false model: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false validate: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false build: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false required: - name - common - input additionalProperties: false target_sets: type: array items: type: object properties: name: type: string variables: type: array items: type: object properties: name: type: string flag: type: string pos_flag_values: type: array neg_flag_values: type: array required: - name - flag - pos_flag_values - neg_flag_values additionalProperties: false required: - name - variables additionalProperties: false step_class_sets: type: array items: type: object properties: name: type: string steps: type: object properties: input: type: string validate: type: string model: type: string build: type: string required: - input - validate - model - build additionalProperties: false required: - name - steps additionalProperties: false step_param_sets: type: array items: type: object properties: name: type: string type: type: string steps: type: object properties: input: type: object validate: type: object model: type: object build: type: object required: - input - validate - model - build additionalProperties: false required: - name - steps additionalProperties: false training_sets: type: array items: type: object properties: name: type: string dataset_folder_name: type: string path_info: type: string target_set: type: string step_class_set: type: string step_param_set: type: string required: - name - dataset_folder_name - path_info - target_set - step_class_set - step_param_set additionalProperties: false additionalProperties: false required: - path_info_sets - target_sets - step_class_sets - step_param_sets - training_sets """ return yaml_schema
[docs] def get_classification_config_schema() -> str: """ Retrieve the YAML-based JSON schema for classification configurations. The returned schema requires certain objects and properties (e.g., path_info_sets, target_sets, feature_sets, etc.), each with nested type constraints and additional properties set to false when appropriate. :return: A YAML string representing the JSON schema for classification configurations. :rtype: str """ yaml_schema = """ --- type: object properties: path_info_sets: type: array items: type: object properties: name: type: string common: type: object properties: base_path: type: string step_folder_name: type: string required: - base_path additionalProperties: false input: type: object properties: base_path: type: string step_folder_name: type: string required: - base_path - step_folder_name additionalProperties: false select: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false locate: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false model: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false classify: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false concat: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false normalize: type: object properties: base_path: type: string step_folder_name: type: string additionalProperties: false required: - name - common - input additionalProperties: false target_sets: type: array items: type: object properties: name: type: string variables: type: array items: type: object properties: name: type: string flag: type: string pos_flag_values: type: array neg_flag_values: type: array required: - name - flag - pos_flag_values - neg_flag_values additionalProperties: false required: - name - variables additionalProperties: false summary_stats_sets: type: array items: type: object properties: name: type: string stats: type: array items: type: object properties: name: type: string col_names: type: array items: type: string required: - name - col_names required: - name - stats additionalProperties: false feature_sets: type: array items: type: object properties: name: type: string features: type: array items: type: string required: - name - features additionalProperties: false feature_param_sets: type: array items: type: object properties: name: type: string params: type: array items: type: object properties: feature: type: string col_names: type: array items: type: string stats_set: type: object properties: name: type: string type: type: string convert: type: string flank_up: type: integer flank_down: type: integer summary_stats_names: type: array items: type: string stats: type: object required: - feature - col_names additionalProperties: false required: - name - params additionalProperties: false feature_stats_sets: type: array items: type: object properties: name: type: string min_max: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats auto_min_max: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats standard: type: array items: type: object properties: name: type: string stats: type: object required: - name - stats required: - name additionalProperties: false step_class_sets: type: array items: type: object properties: name: type: string steps: type: object properties: input: type: string summary: type: string select: type: string locate: type: string extract: type: string model: type: string classify: type: string concat: type: string required: - input - summary - select - locate - extract - model - classify - concat additionalProperties: false required: - name - steps additionalProperties: false step_param_sets: type: array items: type: object properties: name: type: string type: type: string steps: type: object properties: input: type: object properties: sub_steps: type: object properties: rename_columns: type: boolean filter_rows: type: boolean validate_columns: type: boolean create_columns: type: boolean required: - rename_columns - filter_rows additionalProperties: false rename_dict: type: object filter_method_dict: type: object properties: remove_years: type: array keep_years: type: array additionalProperties: false create_column_dict: type: object properties: key_columns: type: array sort_columns: type: array columns: type: array additionalProperties: false required: - sub_steps additionalProperties: false summary: type: object select: type: object locate: type: object extract: type: object model: type: object classify: type: object concat: type: object normalize: type: object properties: file_name: type: string additionalProperties: false required: - input - summary - select - locate - extract - model - classify - concat additionalProperties: false required: - name - steps additionalProperties: false classification_sets: type: array items: type: object properties: name: type: string dataset_folder_name: type: string input_file_name: type: string path_info: type: string target_set: type: string summary_stats_set: type: string feature_set: type: string feature_param_set: type: string feature_stats_set: type: string step_class_set: type: string step_param_set: type: string required: - name - dataset_folder_name - input_file_name - path_info - target_set - summary_stats_set - feature_set - feature_param_set - feature_stats_set - step_class_set - step_param_set additionalProperties: false additionalProperties: false required: - path_info_sets - target_sets - summary_stats_sets - feature_sets - feature_param_sets - feature_stats_sets - step_class_sets - step_param_sets - classification_sets """ return yaml_schema