Source code for mt940.parser
"""
Format
---------------------
Sources:
.. _Swift for corporates: http://www.sepaforcorporates.com/\
swift-for-corporates/account-statement-mt940-file-format-overview/
.. _Rabobank MT940: https://www.rabobank.nl/images/\
formaatbeschrijving_swift_bt940s_1_0_nl_rib_29539296.pdf
- `Swift for corporates`_
- `Rabobank MT940`_
::
[] = optional
! = fixed length
a = Text
x = Alphanumeric, seems more like text actually. Can include special
characters (slashes) and whitespace as well as letters and numbers
d = Numeric separated by decimal (usually comma)
c = Code list value
n = Numeric
"""
from __future__ import annotations
import os
import re
from collections.abc import Iterable
from typing import TYPE_CHECKING, Any
import mt940
if TYPE_CHECKING:
from ._types import Processors, Source
from .models import Transactions
def _read(src: Any, encoding: str | None = None) -> str:
"""Read raw mt940 data from a file handle, path or string and decode it."""
def safe_is_file(filename: Any) -> bool:
try:
return os.path.isfile(filename)
except ValueError: # pragma: no cover
return False
if hasattr(src, 'read'): # pragma: no branch
data = src.read()
elif safe_is_file(src):
with open(src, 'rb') as fh:
data = fh.read()
else: # pragma: no cover
data = src
if hasattr(data, 'decode'): # pragma: no branch
exception = None
encodings = [encoding, 'utf-8', 'cp852', 'iso8859-15', 'latin1']
for enc in encodings: # pragma: no cover
if not enc:
continue
try:
data = data.decode(enc)
break
except UnicodeDecodeError as e:
exception = e
except UnicodeEncodeError:
break
else: # pragma: no cover
assert exception is not None
raise exception # pragma: no cover
assert isinstance(data, str)
return data
[docs]
def parse(
src: Source,
encoding: str | None = None,
processors: Processors | None = None,
tags: dict[int | str, mt940.tags.Tag] | None = None,
transaction_boundary: Iterable[str] | None = None,
) -> Transactions:
"""Parse MT940 data into a single :class:`~mt940.models.Transactions`.
Args:
src: A file handle, a filename to read, or the raw data as
``str``/``bytes``.
encoding: Optional encoding override for byte input.
processors: Optional extra pre/post processors.
tags: Optional extra or overriding tag parsers.
transaction_boundary: Optional iterable of tag *slugs* that each start
a new transaction (issue #110). By default only ``:61:`` starts a
transaction; pass e.g. ``{'transaction_reference_number'}`` to also
start one on every ``:20:``. Omit it to keep the legacy behaviour.
Returns:
The parsed collection of transactions.
"""
data = _read(src, encoding)
transactions = mt940.models.Transactions(
processors, tags, transaction_boundary=transaction_boundary
)
transactions.parse(data)
return transactions
[docs]
def parse_statements(
src: Source,
encoding: str | None = None,
processors: Processors | None = None,
tags: dict[int | str, mt940.tags.Tag] | None = None,
transaction_boundary: Iterable[str] | None = None,
) -> list[Transactions]:
"""
Parse an mt940 file that contains multiple statement blocks.
Unlike :func:`parse`, which merges everything into a single
:class:`~mt940.models.Transactions`, this splits the input on ``:20:``
statement boundaries and parses each block into its own
:class:`~mt940.models.Transactions`. Use it for files that concatenate
several statements (e.g. balance-only blocks), where a single
``Transactions`` would only keep the last block's statement-level data such
as the opening/closing/available balances (issue #107).
Each ``:20:`` is treated as the start of a new statement, matching the
standard where ``:20:`` is the once-per-statement transaction reference.
This is therefore mutually exclusive with
``transaction_boundary={'transaction_reference_number'}`` (issue #110),
which instead treats ``:20:`` as an *intra*-statement transaction boundary;
the two target different, non-standard bank formats -- don't combine them.
Args:
src: A file handle, a filename to read, or the raw data as
``str``/``bytes``.
encoding: Optional encoding override for byte input.
processors: Optional extra pre/post processors (applied per block).
tags: Optional extra or overriding tag parsers (applied per block).
transaction_boundary: See :func:`parse` (and the note above).
Returns:
One :class:`~mt940.models.Transactions` per statement block.
"""
data = _read(src, encoding)
statements: list[Transactions] = []
for block in re.split(r'(?m)^(?=:20:)', data):
if not block.strip().startswith(':20:'):
# Drop any leading header / empty chunk before the first :20:.
continue
transactions = mt940.models.Transactions(
processors, tags, transaction_boundary=transaction_boundary
)
transactions.parse(block)
statements.append(transactions)
return statements