Source code for mt940.processors

"""
Module Processors

This module contains pre- and post-processors for modifying tag
dictionaries in MT940 processing. It provides functions for currency
addition, date fix-up, transaction code extraction, transaction details
parsing, and segment joining for transaction details.
"""

from __future__ import annotations

import calendar
import collections
import functools
import re
from typing import TYPE_CHECKING, Any

from ._types import PostProcessor, PreProcessor

if TYPE_CHECKING:
    from . import models, tags



[docs]
def add_currency_pre_processor(
    currency: str,
    overwrite: bool = True,
) -> PreProcessor:
    """
    Return a pre-processor that adds currency information
    to tag dictionaries.

    Args:
        currency: The currency to set in the tag dictionary.
        overwrite: Whether to overwrite existing currency information.

    Returns:
        A pre-processor function that adds currency information.
    """

    def _add_currency_pre_processor(
        transactions: models.Transactions,
        tag: tags.Tag,
        tag_dict: dict[str, Any],
        *args: Any,
    ) -> dict[str, Any]:
        if 'currency' not in tag_dict or overwrite:  # pragma: no branch
            tag_dict['currency'] = currency
        return tag_dict

    return _add_currency_pre_processor




[docs]
def date_fixup_pre_processor(
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    *args: Any,
) -> dict[str, Any]:
    """
    Adjust the date in the tag dictionary if necessary.

    If the day in February exceeds the maximum day in that month,
    adjust it to the last day of February.

    Args:
        transactions: The transactions object.
        tag: The tag being processed.
        tag_dict: The tag dictionary.

    Returns:
        The adjusted tag dictionary.
    """
    # If the month is February, ensure that the day does not exceed the
    # maximum valid day.
    if tag_dict['month'] == '02':
        year = int(tag_dict['year'], 10)
        _, max_month_day = calendar.monthrange(year, 2)
        if int(tag_dict['day'], 10) > max_month_day:
            tag_dict['day'] = str(max_month_day)
    return tag_dict




[docs]
def date_cleanup_post_processor(
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    result: dict[str, Any],
) -> dict[str, Any]:
    """
    Remove date components from the result dictionary.

    Removes the 'day', 'month', 'year', 'entry_day', and 'entry_month' keys
    from the result dictionary.

    Args:
        transactions: The transactions object.
        tag: The tag being processed.
        tag_dict: The tag dictionary.
        result: The result dictionary.

    Returns:
        The adjusted result dictionary.
    """
    # Remove all date-related keys from the result dictionary.
    for k in ('day', 'month', 'year', 'entry_day', 'entry_month'):
        result.pop(k, None)
    return result




[docs]
def mBank_set_transaction_code(  # noqa: N802
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    *args: Any,
) -> dict[str, Any]:
    """
    mBank Collect uses transaction code 911 to distinguish incoming mass
    payments transactions, adding transaction_code may be helpful in further
    processing.
    """
    # Extract the transaction code from the tag value.
    # Split the value at ';' and then by the first space to isolate the
    # numeric transaction code, which is converted to an integer before
    # being assigned.
    tag_value = tag_dict[tag.slug]
    tag_dict['transaction_code'] = int(
        tag_value.split(';')[0].split(' ', 1)[0]
    )
    return tag_dict



# Regular expression to extract IPH ID from mBank tag values.
iph_id_re = re.compile(r' ID IPH: X*(?P<iph_id>\d{0,14});')



[docs]
def mBank_set_iph_id(  # noqa: N802
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    *args: Any,
) -> dict[str, Any]:
    """
    mBank Collect uses ID IPH to distinguish between virtual accounts,
    adding iph_id may be helpful in further processing.
    """
    matches = iph_id_re.search(tag_dict[tag.slug])
    if matches:  # pragma: no branch
        tag_dict['iph_id'] = matches.group('iph_id')
    return tag_dict



# Regular expression to extract the Transaction Number (TNR) from tag
# values, accounting for potential newline characters.
tnr_re = re.compile(r'TNR:[ \n](?P<tnr>\d+\.\d+)', flags=re.MULTILINE)



[docs]
def mBank_set_tnr(  # noqa: N802
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    *args: Any,
) -> dict[str, Any]:
    """
    mBank Collect states TNR in transaction details as unique id for
    transactions, that may be used to identify the same transactions in
    different statement files eg. partial mt942 and full mt940
    Information about TNR uniqueness has been obtained from mBank support,
    it lacks in mt940 mBank specification.
    """
    matches = tnr_re.search(tag_dict[tag.slug])
    if matches:  # pragma: no branch
        tag_dict['tnr'] = matches.group('tnr')
    return tag_dict



# https://www.db-bankline.deutsche-bank.com/download/MT940_Deutschland_Structure2002.pdf
DETAIL_KEYS = {
    '': 'transaction_code',
    '00': 'posting_text',
    '10': 'prima_nota',
    '20': 'purpose',
    '30': 'applicant_bin',
    '31': 'applicant_name',
    '32': 'applicant_name',
    '34': 'return_debit_notes',
    '35': 'recipient_name',
    '60': 'additional_purpose',
}

# https://www.hettwer-beratung.de/sepa-spezialwissen/sepa-technische-anforderungen/sepa-gesch%C3%A4ftsvorfallcodes-gvc-mt-940/
GVC_KEYS = {
    '': 'purpose',
    'IBAN': 'gvc_applicant_iban',
    'BIC ': 'gvc_applicant_bin',
    'EREF': 'end_to_end_reference',
    'MREF': 'additional_position_reference',
    'CRED': 'applicant_creditor_id',
    'PURP': 'purpose_code',
    'SVWZ': 'purpose',
    'MDAT': 'additional_position_date',
    'ABWA': 'deviate_applicant',
    'ABWE': 'deviate_recipient',
    'SQTP': 'FRST_ONE_OFF_RECC',
    'ORCR': 'old_SEPA_CI',
    'ORMR': 'old_SEPA_additional_position_reference',
    'DDAT': 'settlement_tag',
    'KREF': 'customer_reference',
    'DEBT': 'debitor_identifier',
    'COAM': 'compensation_amount',
    'OAMT': 'original_amount',
}


def _parse_segments(detail_str: str) -> collections.OrderedDict[str, str]:
    """
    Parse segments from a detail string.

    This function splits the provided detail string into segments using
    the '?' delimiter. Each segment is associated with a two-character
    segment type that follows the '?' marker.

    Args:
        detail_str: A string containing the transaction detail segments.

    Returns:
        An OrderedDict mapping segment identifiers to their extracted content.
    """
    tmp: collections.OrderedDict[str, str] = collections.OrderedDict()
    segment = ''
    segment_type = ''

    for index, char in enumerate(detail_str):
        if char != '?':
            # Accumulate characters into the current segment until a '?'
            # delimiter is encountered.
            segment += char
            continue

        # If there aren't enough characters left to form a segment type,
        # exit the loop.
        if index + 2 >= len(detail_str):
            break

        # Finalize the current segment. If a segment type exists, skip the
        # first two header characters.
        tmp[segment_type] = segment if not segment_type else segment[2:]
        # Extract the new segment type from the following two characters.
        segment_type = detail_str[index + 1] + detail_str[index + 2]
        # Reset the segment accumulator for the next segment.
        segment = ''

    if segment_type:  # pragma: no branch
        # Finalize the last captured segment.
        tmp[segment_type] = segment if not segment_type else segment[2:]

    return tmp


def _process_segments(
    tmp: collections.OrderedDict[str, str],
) -> dict[str, list[str]]:
    """
    Process segments into result dictionary.

    Args:
        tmp: An OrderedDict of segment types to their content.

    Returns:
        A dictionary mapping keys to lists of segment contents.
    """
    result: collections.defaultdict[str, list[str]] = collections.defaultdict(
        list
    )
    for key, value in tmp.items():
        if key in DETAIL_KEYS:
            result[DETAIL_KEYS[key]].append(value)
        elif key == '33':
            key32 = DETAIL_KEYS['32']
            result[key32].append(value)
        elif key.startswith('2'):
            # Some banks append a bare ' BIC'/' IBAN' label with no value at
            # the end of a detail segment (issue #109); strip the dangling
            # label so it does not pollute the purpose. Segment keys are
            # always two characters (see _parse_segments), so the historical
            # '29'/'28D' key checks could never match the IBAN case -- the
            # label is matched on the value instead.
            for label in (' BIC', ' IBAN'):
                if value.endswith(label):
                    value = value.removesuffix(label).rstrip()
                    break
            key20 = DETAIL_KEYS['20']
            result[key20].append(value)
        elif key in {'60', '61', '62', '63', '64', '65'}:
            key60 = DETAIL_KEYS['60']
            result[key60].append(value)
    return result


def _join_result(
    result: dict[str, list[str]],
    space: bool,
) -> dict[str, str | None]:
    """
    Join result lists into strings.

    Args:
        result: The result dictionary with lists of strings.
        space: Whether to include spaces between segments.

    Returns:
        A dictionary with joined strings.
    """
    joined_result: dict[str, str | None] = {}
    for key in DETAIL_KEYS.values():
        if space:
            value = ' '.join(result.get(key, []))
        else:
            value = ''.join(result.get(key, []))
        joined_result[key] = value or None
    return joined_result


def _parse_mt940_details(
    detail_str: str,
    space: bool = False,
) -> dict[str, str | None]:
    """
    Parse MT940 transaction details.

    Args:
        detail_str: The detail string to parse.
        space: Whether to include spaces between segments.

    Returns:
        A dictionary of parsed transaction details.
    """
    tmp = _parse_segments(detail_str)
    result = _process_segments(tmp)
    return _join_result(result, space)


def _parse_mt940_gvcodes(purpose: str) -> dict[str, str | None]:
    """
    Parse MT940 GVC codes from the purpose string.

    Args:
        purpose: The purpose string to parse.

    Returns:
        A dictionary of parsed GVC codes.
    """
    result: dict[str, str | None] = dict.fromkeys(GVC_KEYS.values())

    tmp: dict[str, str] = {}
    segment_type: str | None = None
    text = ''

    for index, char in enumerate(purpose):
        # Detect the beginning of a GVC segment: if a '+' is encountered
        # and the four characters preceding it form a valid GVC key.
        if char == '+' and purpose[index - 4 : index] in GVC_KEYS:
            if segment_type:
                # If already processing a segment, finalize it by removing
                # the trailing GVC key and reset the text accumulator.
                tmp[segment_type] = text[:-4]
                text = ''
            else:
                text = ''
            # Set the new segment type from the four characters preceding
            # the '+'.
            segment_type = purpose[index - 4 : index]
        else:
            text += char

    if segment_type:  # pragma: no branch
        tmp[segment_type] = text
    else:
        tmp[''] = text  # pragma: no cover

    for key, value in tmp.items():
        result[GVC_KEYS[key]] = value

    return result



[docs]
def transaction_details_post_processor(
    transactions: models.Transactions,
    tag: tags.Tag,
    tag_dict: dict[str, Any],
    result: dict[str, Any],
    space: bool = False,
) -> dict[str, Any]:
    """
    Parse the extra details in some transaction formats,
    such as the 60-65 keys.

    Args:
        transactions: The transactions object.
        tag: The tag being processed.
        tag_dict: The tag dictionary.
        result: The result dictionary.
        space: Whether to include spaces between segments.

    Returns:
        The updated result dictionary.
    """
    details = tag_dict['transaction_details']
    details = ''.join(detail.strip('\n\r') for detail in details.splitlines())

    # check for e.g. 103?00...
    if re.match(r'^\d{3}\?\d{2}', details):
        result.update(_parse_mt940_details(details, space=space))

        purpose = result.get('purpose')

        if purpose and any(gvk in purpose for gvk in GVC_KEYS if gvk != ''):
            result.update(_parse_mt940_gvcodes(result['purpose']))

        # Clean up the purpose field
        if result.get('purpose'):
            # Remove trailing "BIC" without an actual BIC value
            result['purpose'] = result['purpose'].removesuffix(' BIC')

        del result['transaction_details']

    return result



transaction_details_post_processor_with_space = functools.partial(
    transaction_details_post_processor, space=True
)
transaction_details_post_processor_with_space.__doc__ = """
A variant of transaction_details_post_processor that includes spaces between
segments.
"""



[docs]
def transactions_to_transaction(
    *keys: str,
) -> PostProcessor:
    """
    Copy the global transactions details to the transaction.

    Args:
        *keys: The keys to copy to the transaction.

    Returns:
        A post-processor function that copies specified keys.
    """

    def _transactions_to_transaction(
        transactions: models.Transactions,
        tag: tags.Tag,
        tag_dict: dict[str, Any],
        result: dict[str, Any],
    ) -> dict[str, Any]:
        """
        Copy the global transactions details to the transaction.

        Args:
            transactions: The transactions object.
            tag: The tag being processed.
            tag_dict: The tag dictionary.
            result: The result dictionary.

        Returns:
            The updated result dictionary.
        """
        # Copy each specified key from the global transactions data to the
        # transaction-specific dictionary.
        for key in keys:
            if key in transactions.data:
                result[key] = transactions.data[key]
        return result

    return _transactions_to_transaction