Source code for mt940.processors

"""
Module Processors

This module contains pre- and post-processors for modifying tag
dictionaries in MT940 processing. It provides functions for currency
addition, date fix-up, transaction code extraction, transaction details
parsing, and segment joining for transaction details.
"""

from __future__ import annotations

import calendar
import collections
import functools
import re
from typing import TYPE_CHECKING, Any

from ._types import PostProcessor, PreProcessor

if TYPE_CHECKING:
    from . import models, tags


[docs] def add_currency_pre_processor( currency: str, overwrite: bool = True, ) -> PreProcessor: """ Return a pre-processor that adds currency information to tag dictionaries. Args: currency: The currency to set in the tag dictionary. overwrite: Whether to overwrite existing currency information. Returns: A pre-processor function that adds currency information. """ def _add_currency_pre_processor( transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], *args: Any, ) -> dict[str, Any]: if 'currency' not in tag_dict or overwrite: # pragma: no branch tag_dict['currency'] = currency return tag_dict return _add_currency_pre_processor
[docs] def date_fixup_pre_processor( transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], *args: Any, ) -> dict[str, Any]: """ Adjust the date in the tag dictionary if necessary. If the day in February exceeds the maximum day in that month, adjust it to the last day of February. Args: transactions: The transactions object. tag: The tag being processed. tag_dict: The tag dictionary. Returns: The adjusted tag dictionary. """ # If the month is February, ensure that the day does not exceed the # maximum valid day. if tag_dict['month'] == '02': year = int(tag_dict['year'], 10) _, max_month_day = calendar.monthrange(year, 2) if int(tag_dict['day'], 10) > max_month_day: tag_dict['day'] = str(max_month_day) return tag_dict
[docs] def date_cleanup_post_processor( transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], result: dict[str, Any], ) -> dict[str, Any]: """ Remove date components from the result dictionary. Removes the 'day', 'month', 'year', 'entry_day', and 'entry_month' keys from the result dictionary. Args: transactions: The transactions object. tag: The tag being processed. tag_dict: The tag dictionary. result: The result dictionary. Returns: The adjusted result dictionary. """ # Remove all date-related keys from the result dictionary. for k in ('day', 'month', 'year', 'entry_day', 'entry_month'): result.pop(k, None) return result
[docs] def mBank_set_transaction_code( # noqa: N802 transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], *args: Any, ) -> dict[str, Any]: """ mBank Collect uses transaction code 911 to distinguish incoming mass payments transactions, adding transaction_code may be helpful in further processing. """ # Extract the transaction code from the tag value. # Split the value at ';' and then by the first space to isolate the # numeric transaction code, which is converted to an integer before # being assigned. tag_value = tag_dict[tag.slug] tag_dict['transaction_code'] = int( tag_value.split(';')[0].split(' ', 1)[0] ) return tag_dict
# Regular expression to extract IPH ID from mBank tag values. iph_id_re = re.compile(r' ID IPH: X*(?P<iph_id>\d{0,14});')
[docs] def mBank_set_iph_id( # noqa: N802 transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], *args: Any, ) -> dict[str, Any]: """ mBank Collect uses ID IPH to distinguish between virtual accounts, adding iph_id may be helpful in further processing. """ matches = iph_id_re.search(tag_dict[tag.slug]) if matches: # pragma: no branch tag_dict['iph_id'] = matches.group('iph_id') return tag_dict
# Regular expression to extract the Transaction Number (TNR) from tag # values, accounting for potential newline characters. tnr_re = re.compile(r'TNR:[ \n](?P<tnr>\d+\.\d+)', flags=re.MULTILINE)
[docs] def mBank_set_tnr( # noqa: N802 transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], *args: Any, ) -> dict[str, Any]: """ mBank Collect states TNR in transaction details as unique id for transactions, that may be used to identify the same transactions in different statement files eg. partial mt942 and full mt940 Information about TNR uniqueness has been obtained from mBank support, it lacks in mt940 mBank specification. """ matches = tnr_re.search(tag_dict[tag.slug]) if matches: # pragma: no branch tag_dict['tnr'] = matches.group('tnr') return tag_dict
# https://www.db-bankline.deutsche-bank.com/download/MT940_Deutschland_Structure2002.pdf DETAIL_KEYS = { '': 'transaction_code', '00': 'posting_text', '10': 'prima_nota', '20': 'purpose', '30': 'applicant_bin', '31': 'applicant_name', '32': 'applicant_name', '34': 'return_debit_notes', '35': 'recipient_name', '60': 'additional_purpose', } # https://www.hettwer-beratung.de/sepa-spezialwissen/sepa-technische-anforderungen/sepa-gesch%C3%A4ftsvorfallcodes-gvc-mt-940/ GVC_KEYS = { '': 'purpose', 'IBAN': 'gvc_applicant_iban', 'BIC ': 'gvc_applicant_bin', 'EREF': 'end_to_end_reference', 'MREF': 'additional_position_reference', 'CRED': 'applicant_creditor_id', 'PURP': 'purpose_code', 'SVWZ': 'purpose', 'MDAT': 'additional_position_date', 'ABWA': 'deviate_applicant', 'ABWE': 'deviate_recipient', 'SQTP': 'FRST_ONE_OFF_RECC', 'ORCR': 'old_SEPA_CI', 'ORMR': 'old_SEPA_additional_position_reference', 'DDAT': 'settlement_tag', 'KREF': 'customer_reference', 'DEBT': 'debitor_identifier', 'COAM': 'compensation_amount', 'OAMT': 'original_amount', } def _parse_segments(detail_str: str) -> collections.OrderedDict[str, str]: """ Parse segments from a detail string. This function splits the provided detail string into segments using the '?' delimiter. Each segment is associated with a two-character segment type that follows the '?' marker. Args: detail_str: A string containing the transaction detail segments. Returns: An OrderedDict mapping segment identifiers to their extracted content. """ tmp: collections.OrderedDict[str, str] = collections.OrderedDict() segment = '' segment_type = '' for index, char in enumerate(detail_str): if char != '?': # Accumulate characters into the current segment until a '?' # delimiter is encountered. segment += char continue # If there aren't enough characters left to form a segment type, # exit the loop. if index + 2 >= len(detail_str): break # Finalize the current segment. If a segment type exists, skip the # first two header characters. tmp[segment_type] = segment if not segment_type else segment[2:] # Extract the new segment type from the following two characters. segment_type = detail_str[index + 1] + detail_str[index + 2] # Reset the segment accumulator for the next segment. segment = '' if segment_type: # pragma: no branch # Finalize the last captured segment. tmp[segment_type] = segment if not segment_type else segment[2:] return tmp def _process_segments( tmp: collections.OrderedDict[str, str], ) -> dict[str, list[str]]: """ Process segments into result dictionary. Args: tmp: An OrderedDict of segment types to their content. Returns: A dictionary mapping keys to lists of segment contents. """ result: collections.defaultdict[str, list[str]] = collections.defaultdict( list ) for key, value in tmp.items(): if key in DETAIL_KEYS: result[DETAIL_KEYS[key]].append(value) elif key == '33': key32 = DETAIL_KEYS['32'] result[key32].append(value) elif key.startswith('2'): # Some banks append a bare ' BIC'/' IBAN' label with no value at # the end of a detail segment (issue #109); strip the dangling # label so it does not pollute the purpose. Segment keys are # always two characters (see _parse_segments), so the historical # '29'/'28D' key checks could never match the IBAN case -- the # label is matched on the value instead. for label in (' BIC', ' IBAN'): if value.endswith(label): value = value.removesuffix(label).rstrip() break key20 = DETAIL_KEYS['20'] result[key20].append(value) elif key in {'60', '61', '62', '63', '64', '65'}: key60 = DETAIL_KEYS['60'] result[key60].append(value) return result def _join_result( result: dict[str, list[str]], space: bool, ) -> dict[str, str | None]: """ Join result lists into strings. Args: result: The result dictionary with lists of strings. space: Whether to include spaces between segments. Returns: A dictionary with joined strings. """ joined_result: dict[str, str | None] = {} for key in DETAIL_KEYS.values(): if space: value = ' '.join(result.get(key, [])) else: value = ''.join(result.get(key, [])) joined_result[key] = value or None return joined_result def _parse_mt940_details( detail_str: str, space: bool = False, ) -> dict[str, str | None]: """ Parse MT940 transaction details. Args: detail_str: The detail string to parse. space: Whether to include spaces between segments. Returns: A dictionary of parsed transaction details. """ tmp = _parse_segments(detail_str) result = _process_segments(tmp) return _join_result(result, space) def _parse_mt940_gvcodes(purpose: str) -> dict[str, str | None]: """ Parse MT940 GVC codes from the purpose string. Args: purpose: The purpose string to parse. Returns: A dictionary of parsed GVC codes. """ result: dict[str, str | None] = dict.fromkeys(GVC_KEYS.values()) tmp: dict[str, str] = {} segment_type: str | None = None text = '' for index, char in enumerate(purpose): # Detect the beginning of a GVC segment: if a '+' is encountered # and the four characters preceding it form a valid GVC key. if char == '+' and purpose[index - 4 : index] in GVC_KEYS: if segment_type: # If already processing a segment, finalize it by removing # the trailing GVC key and reset the text accumulator. tmp[segment_type] = text[:-4] text = '' else: text = '' # Set the new segment type from the four characters preceding # the '+'. segment_type = purpose[index - 4 : index] else: text += char if segment_type: # pragma: no branch tmp[segment_type] = text else: tmp[''] = text # pragma: no cover for key, value in tmp.items(): result[GVC_KEYS[key]] = value return result
[docs] def transaction_details_post_processor( transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], result: dict[str, Any], space: bool = False, ) -> dict[str, Any]: """ Parse the extra details in some transaction formats, such as the 60-65 keys. Args: transactions: The transactions object. tag: The tag being processed. tag_dict: The tag dictionary. result: The result dictionary. space: Whether to include spaces between segments. Returns: The updated result dictionary. """ details = tag_dict['transaction_details'] details = ''.join(detail.strip('\n\r') for detail in details.splitlines()) # check for e.g. 103?00... if re.match(r'^\d{3}\?\d{2}', details): result.update(_parse_mt940_details(details, space=space)) purpose = result.get('purpose') if purpose and any(gvk in purpose for gvk in GVC_KEYS if gvk != ''): result.update(_parse_mt940_gvcodes(result['purpose'])) # Clean up the purpose field if result.get('purpose'): # Remove trailing "BIC" without an actual BIC value result['purpose'] = result['purpose'].removesuffix(' BIC') del result['transaction_details'] return result
transaction_details_post_processor_with_space = functools.partial( transaction_details_post_processor, space=True ) transaction_details_post_processor_with_space.__doc__ = """ A variant of transaction_details_post_processor that includes spaces between segments. """
[docs] def transactions_to_transaction( *keys: str, ) -> PostProcessor: """ Copy the global transactions details to the transaction. Args: *keys: The keys to copy to the transaction. Returns: A post-processor function that copies specified keys. """ def _transactions_to_transaction( transactions: models.Transactions, tag: tags.Tag, tag_dict: dict[str, Any], result: dict[str, Any], ) -> dict[str, Any]: """ Copy the global transactions details to the transaction. Args: transactions: The transactions object. tag: The tag being processed. tag_dict: The tag dictionary. result: The result dictionary. Returns: The updated result dictionary. """ # Copy each specified key from the global transactions data to the # transaction-specific dictionary. for key in keys: if key in transactions.data: result[key] = transactions.data[key] return result return _transactions_to_transaction