Add importer for Mollie invoices

This commit is contained in:
polyfloyd 2025-06-27 17:15:54 +02:00
parent 36115b452a
commit 3351e2d0d4
8 changed files with 154 additions and 34 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
__pycache__
.mypy_cache

18
Uitgaven.beancount Normal file
View file

@ -0,0 +1,18 @@
option "name_assets" "Activa"
option "name_equity" "Vermogen"
option "name_expenses" "Uitgaven"
option "name_income" "Inkomsten"
option "name_liabilities" "Passiva"
option "account_previous_balances" "Openingsbalans"
2025-04-30 * "Mollie B.V." "iDeal transactiekosten" ^MOL-NL-R2025.0000581421
Uitgaven:Bankkosten 7.74 EUR
Passiva:RevBank -7.74 EUR
2025-04-30 document Uitgaven:Bankkosten "docs/Uitgaven/Bankkosten/2025-04-30.MOL-NL-R2025.0000581421.pdf" ^MOL-NL-R2025.0000581421
2025-05-31 * "Mollie B.V." "iDeal transactiekosten" ^MOL-NL-R2025.0000653802
Uitgaven:Bankkosten 17.04 EUR
Passiva:RevBank -17.04 EUR
2025-05-31 document Uitgaven:Bankkosten "docs/Uitgaven/Bankkosten/2025-05-31.MOL-NL-R2025.0000653802.pdf" ^MOL-NL-R2025.0000653802

View file

@ -50,5 +50,6 @@ plugin "beancount_periodic.recur" "{'generate_until':'2025-05-31'}"
include "Activa/Betaalrekening.beancount"
include "Activa/Debiteuren/Huurders.beancount"
include "Activa/Debiteuren/Deelnemers.beancount"
include "Uitgaven.beancount"
include "reimburse.beancount"

47
import/__main__.py Executable file
View file

@ -0,0 +1,47 @@
#!/usr/bin/env python3
from beangulp import Ingest
from beangulp.testing import main
from beancount.core.data import Transaction, Posting
import rabobank
import pdf
def classify_contra(guess_contra, primary_account):
def _inner(extracted_entries_list, ledger_entries):
def _classify(entry):
if not isinstance(entry, Transaction):
return entry
posting = entry.postings[0]
if posting.account != primary_account:
return entry
contra = guess_contra(entry)
entry.postings.append(
Posting(
contra or "Inkomsten:TODO",
-posting.units,
posting.cost,
posting.price,
None if contra else "!",
None,
)
)
return entry
return [
(filename, [_classify(entry) for entry in entries], account, importer)
for filename, entries, account, importer in extracted_entries_list
]
return _inner
if __name__ == "__main__":
importers = [
rabobank.Importer("Activa:Betaalrekening", "EUR"),
pdf.MollieInvoiceImporter(),
]
hooks = [classify_contra(rabobank.guess_contra, "Activa:Betaalrekening")]
main = Ingest(importers, hooks)
main()

86
import/pdf.py Normal file
View file

@ -0,0 +1,86 @@
from pypdf import PdfReader
from beangulp import mimetypes, Importer
from beangulp.cache import cache
import re
from datetime import date
from beancount.core.data import Transaction, Posting, Document
from beancount.core import flags
from beancount.core import data
from beancount.core import amount
from beancount.core.number import D
@cache
def pdf_to_text(filename):
reader = PdfReader(filename)
page1 = reader.pages[0]
return page1.extract_text()
class MollieInvoiceImporter(Importer):
def identify(self, filepath):
mimetype, encoding = mimetypes.guess_type(filepath)
if mimetype != "application/pdf":
return False
lines = pdf_to_text(filepath).split("\n")
return any(line.startswith("Mollie B.V.") for line in lines)
def account(self, filepath):
return "Uitgaven:Bankkosten"
def tx_ref(self, filepath):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Invoice reference (MOL-.+)$", line):
return m[1]
raise Exception("Mollie invoice reference not found")
def filename(self, filepath):
return f"{self.tx_ref(filepath)}.pdf"
def date(self, filepath):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Invoice date (\d{2})-(\d{2})-(\d{4})$", line):
return date(int(m[3]), int(m[2]), int(m[1]))
raise Exception("Mollie invoice date not found")
def extract(self, filepath, existing):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Total including VAT ([\d.]+) EUR$", line):
total = D(m[1])
break
else:
raise Exception("Mollie invoice total not found")
contra = "Passiva:RevBank"
name = self.filename(filepath)
date = self.date(filepath)
link = self.tx_ref(filepath)
units = amount.Amount(total, "EUR")
doc = Document(
meta=data.new_metadata(filepath, 0),
date=date,
account=self.account(filepath),
filename=f"docs/Uitgaven/Bankkosten/{date}.{name}",
tags=set(),
links={link},
)
tx = Transaction(
meta=data.new_metadata(filepath, 0),
date=date,
flag=flags.FLAG_OKAY,
payee="Mollie B.V.",
narration="iDeal transactiekosten",
tags=set(),
links={link},
postings=[
Posting(self.account(filepath), units, None, None, None, None),
Posting(contra, -units, None, None, None, None),
],
)
return [tx, doc]

34
import → import/rabobank.py Executable file → Normal file
View file

@ -1,11 +1,8 @@
#!/usr/bin/env python3
from os.path import basename
from beangulp import Ingest
from beangulp import mimetypes
from beangulp.importers import csvbase
from beangulp.testing import main
from beancount.core.data import Transaction, Posting
from deelnemers import deelnemers
@ -121,34 +118,3 @@ def guess_contra(entry):
return f"Activa:Debiteuren:Deelnemers:{p.nickname.title().replace('_', '')}"
return None
def classify_hook(extracted_entries_list, ledger_entries):
def _classify(entry):
if not isinstance(entry, Transaction):
return entry
contra = guess_contra(entry)
posting = entry.postings[0]
entry.postings.append(
Posting(
contra or "Inkomsten:TODO",
-posting.units,
posting.cost,
posting.price,
None if contra else "!",
None,
)
)
return entry
return [
(filename, [_classify(entry) for entry in entries], account, importer)
for filename, entries, account, importer in extracted_entries_list
]
if __name__ == "__main__":
importers = [Importer("Activa:Betaalrekening", "EUR")]
hooks = [classify_hook]
main = Ingest(importers, hooks)
main()