Add importer for Mollie invoices

This commit is contained in:
polyfloyd 2025-06-27 17:15:54 +02:00
parent 36115b452a
commit 3351e2d0d4
8 changed files with 154 additions and 34 deletions

86
import/pdf.py Normal file
View file

@ -0,0 +1,86 @@
from pypdf import PdfReader
from beangulp import mimetypes, Importer
from beangulp.cache import cache
import re
from datetime import date
from beancount.core.data import Transaction, Posting, Document
from beancount.core import flags
from beancount.core import data
from beancount.core import amount
from beancount.core.number import D
@cache
def pdf_to_text(filename):
reader = PdfReader(filename)
page1 = reader.pages[0]
return page1.extract_text()
class MollieInvoiceImporter(Importer):
def identify(self, filepath):
mimetype, encoding = mimetypes.guess_type(filepath)
if mimetype != "application/pdf":
return False
lines = pdf_to_text(filepath).split("\n")
return any(line.startswith("Mollie B.V.") for line in lines)
def account(self, filepath):
return "Uitgaven:Bankkosten"
def tx_ref(self, filepath):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Invoice reference (MOL-.+)$", line):
return m[1]
raise Exception("Mollie invoice reference not found")
def filename(self, filepath):
return f"{self.tx_ref(filepath)}.pdf"
def date(self, filepath):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Invoice date (\d{2})-(\d{2})-(\d{4})$", line):
return date(int(m[3]), int(m[2]), int(m[1]))
raise Exception("Mollie invoice date not found")
def extract(self, filepath, existing):
lines = pdf_to_text(filepath).split("\n")
for line in lines:
if m := re.search(r"^Total including VAT ([\d.]+) EUR$", line):
total = D(m[1])
break
else:
raise Exception("Mollie invoice total not found")
contra = "Passiva:RevBank"
name = self.filename(filepath)
date = self.date(filepath)
link = self.tx_ref(filepath)
units = amount.Amount(total, "EUR")
doc = Document(
meta=data.new_metadata(filepath, 0),
date=date,
account=self.account(filepath),
filename=f"docs/Uitgaven/Bankkosten/{date}.{name}",
tags=set(),
links={link},
)
tx = Transaction(
meta=data.new_metadata(filepath, 0),
date=date,
flag=flags.FLAG_OKAY,
payee="Mollie B.V.",
narration="iDeal transactiekosten",
tags=set(),
links={link},
postings=[
Posting(self.account(filepath), units, None, None, None, None),
Posting(contra, -units, None, None, None, None),
],
)
return [tx, doc]