Source code for record_convertor

"""
Module to define record conversion functionality.

Classes:
    - RecordConverter: Public class to be used to convert records based upon
                       provide rules.

usage:
>>> converted_record: dict = \
>>>     RecordConvertor(rules: Rules).convert(record: dict)
"""

from copy import copy
from typing import Any, Optional, Union

import jmespath
from jmespath.exceptions import ParseError

from record_convertor.command_processor import ProcessCommand
from record_convertor.dataclass_processor import DataClassProcessor

from .field_convertors import BaseFieldConvertor, DateFieldConvertor
from .package_settings import (
    DataclassInstance,
    DateFormatProtocol,
    EvaluateConditions,
    FieldConvertorProtocol,
    RecConvKeys,
    RulesDict,
    SkipConvKeys,
    SkipRuleDict,
    keys_in_lower_case,
)
from .package_settings.conditions.condition_settings.condition_types import (
    ConditionsDict,
)
from .rules_generator import RulesFromDict, RulesFromYAML  # NOQA: F401


[docs] class RecordConvertor: RULE_CLASS: type[Union[RulesFromYAML, RulesFromDict]] = RulesFromYAML EVALUATE_CLASS = EvaluateConditions DATA_CLASS_PROCESSOR: DataClassProcessor = DataClassProcessor() KEYS_IN_LOWER_CASE: bool = False DEFAULT_VALUE: dict = {} DEFAULT_FIELD_CONVERTOR_CLASS: type[FieldConvertorProtocol] = BaseFieldConvertor DEFAULT_DATE_FORMAT_CLASS: type[DateFormatProtocol] = DateFieldConvertor COMMAND_CLASS: type[ProcessCommand] = ProcessCommand _stored_copy: Optional["RecordConvertor"] = None def __init__( self, rule_source: str, field_convertor: Optional[type[FieldConvertorProtocol]] = None, date_formatter: Optional[type[DateFormatProtocol]] = None, data_classes: Optional[list[type[DataclassInstance]]] = None, command_class: Optional[type[ProcessCommand]] = None, ): self._rules = self.RULE_CLASS(rule_source=rule_source).rules # set instance of given or default field convertor class self._field_convertor: FieldConvertorProtocol = ( field_convertor or self.DEFAULT_FIELD_CONVERTOR_CLASS )() # set instance of given or default date format class self._date_formatter: DateFormatProtocol = ( date_formatter or self.DEFAULT_DATE_FORMAT_CLASS )() # set the dataclasses attribute as a dict with dataclass name as key and data # the dataclass itself as value dataclasses = data_classes or [] self.DATA_CLASS_PROCESSOR.register_data_classes(dataclasses=dataclasses) self._command_class = command_class or self.COMMAND_CLASS
[docs] def convert(self, record: dict) -> dict: """ Primary public method to run the actual conversion of the record. Args: record (dict): input record Returns: dict: converted record """ output_record: dict = {} self._input_record = ( keys_in_lower_case(record) if self.KEYS_IN_LOWER_CASE else record ) # process all rules (and nested rules) for rule in self._rules.items(): # check if the rule determines that the given record can be skipped # if so return default value if self._skip_this_record(rule): return self.DEFAULT_VALUE # in case of a skip rule that is invalidated in the previous check (ie. # record can not be skipped) no further processing of this rule is needed. if self._is_skip_rule(rule): continue # check if the rule requires a change on the input record to be done # if rule is an input record update rule then proceed with the next rule. if self._change_field_in_input_record_if_required(rule=rule): continue # check if the rule requires a change on the input record to be done # if rule is an input record update rule then proceed with the next rule. if self._is_dataclass_rule(rule=rule): _, dataclass_rule = rule return self.DATA_CLASS_PROCESSOR.data_from_dataclass( record=self._input_record, rules=dataclass_rule, # type: ignore record_convertor=self._copy, ) # All possible command options have been excluded so rule must be a key # definition for the new record: if self._is_command_rule(rule=rule): command, command_args = rule return self._command_class( record=self._input_record, process_command=command, process_args=command_args, # type: ignore record_convertor=self._copy, ).get_value() output_record_key, output_record_value = rule if isinstance(output_record_value, dict): # output_record_value is the nested rule set. So a new recordconvertor # with the new rule setis defined nested_record_covertor = self.get_record_convertor_copy_with_new_rules( output_record_value ) # add the result of that new record convertor to the output record. output_record[output_record_key] = nested_record_covertor.convert( record=self._input_record ) continue if isinstance(output_record_value, str): # setup with None needed to allow result_for_key to be 0 result_for_output_record_key = self._get_field(output_record_value) if result_for_output_record_key is not None: output_record[output_record_key] = result_for_output_record_key continue return output_record
[docs] def get_record_convertor_copy_with_new_rules( self, new_rules: RulesDict ) -> "RecordConvertor": """ Return a copy of the current record convertor instance with new rules. """ new_record_convertor = self._copy new_record_convertor._rules = new_rules return new_record_convertor
@property def _copy(self) -> "RecordConvertor": # prevent from creating class copy everytime a _copy method is called # by storing the first copy in the _stored_copy attribute if not self._stored_copy: self._stored_copy = copy(self) return self._stored_copy def _change_field_in_input_record_if_required(self, rule: tuple) -> bool: """ Checks if input record needs to be updated based upon the given. If so update is performed. Returns True if the rule is an input record update rule and false otherwise. """ # check if the rule triggers a field conversion in the input record if self._convert_field_rule(rule): _, rule_dict = rule self._input_record = self._field_convertor.convert_field( record=self._input_record, conversion_rule=rule_dict ) return True # check if the rule triggers a field date conversion in the input record if self._format_date_rule(rule): _, rule_dict = rule self._input_record = self._date_formatter.format_date_field( record=self._input_record, conversion_rule=rule_dict ) return True return False def _convert_field_rule(self, rule: tuple) -> bool: rule_key, _ = rule return "$convert" in rule_key def _format_date_rule(self, rule: tuple) -> bool: rule_key, _ = rule return "$format_date" in rule_key def _is_dataclass_rule(self, rule: tuple) -> bool: rule_key, _ = rule return "$dataclass" in rule_key def _is_command_rule(self, rule: tuple) -> bool: rule_key, _ = rule return rule_key[0] == "$" def _is_skip_rule(self, rule: tuple) -> bool: rule_key, rule_value = rule if RecConvKeys.SKIP in rule_key.lower(): return True return False def _skip_this_record(self, rule: tuple) -> bool: rule_key, rule_value = rule if self._is_skip_rule(rule): skip_rule: SkipRuleDict = rule_value conditions: Optional[ConditionsDict] = skip_rule[SkipConvKeys.CONDITION] fieldname: Optional[str] = skip_rule.get(SkipConvKeys.FIELDNAME) field_value = self._get_field(fieldname) return self.EVALUATE_CLASS(conditions, field_value).evaluate() return False def _get_field(self, key: Optional[str]) -> Any: if key: # key elemenets in nested keys are surround with "". For exmample # key.example-1 becomes "key"."example-1". # Needed for jmespath can hande special characters in the keys nested_keys = key.split(".") nested_key = ".".join(['"' + key + '"' for key in nested_keys]) try: return jmespath.search(nested_key, self._input_record) except ParseError: pass return None
[docs] class RecordConvertorWithRulesDict(RecordConvertor): RULE_CLASS = RulesFromDict def __init__( self, rule_dict: dict, field_convertor: Optional[type[FieldConvertorProtocol]] = None, date_formatter: Optional[type[DateFormatProtocol]] = None, data_classes: Optional[list[type[DataclassInstance]]] = None, command_class: Optional[type[ProcessCommand]] = None, ): self._rules = self.RULE_CLASS(rule_source=rule_dict).rules # set instance of given or default field convertor class self._field_convertor: FieldConvertorProtocol = ( field_convertor or self.DEFAULT_FIELD_CONVERTOR_CLASS )() # set instance of given or default date format class self._date_formatter: DateFormatProtocol = ( date_formatter or self.DEFAULT_DATE_FORMAT_CLASS )() # set the dataclasses attribute as a dict with dataclass name as key and data # the dataclass itself as value dataclasses = data_classes or [] self.DATA_CLASS_PROCESSOR.register_data_classes(dataclasses=dataclasses) self._command_class = command_class or self.COMMAND_CLASS