import re from datetime import date from beancount.core import amount, data, flags from beancount.core.data import Document, Posting, Transaction from beancount.core.number import D from beangulp import Importer, mimetypes from pypdf import PdfReader def pdf_lines(filename): reader = PdfReader(filename) return [line for page in reader.pages for line in page.extract_text().split("\n")] class MollieInvoiceImporter(Importer): def identify(self, filepath): mimetype, encoding = mimetypes.guess_type(filepath) if mimetype != "application/pdf": return False return any(line.startswith("Mollie B.V.") for line in pdf_lines(filepath)) def account(self, filepath): return "Uitgaven:Bankkosten" def tx_ref(self, filepath): for line in pdf_lines(filepath): if m := re.search(r"^Invoice reference (MOL-.+)$", line): return m[1] raise Exception("Mollie invoice reference not found") def filename(self, filepath): return f"{self.tx_ref(filepath)}.pdf" def date(self, filepath): for line in pdf_lines(filepath): if m := re.search(r"^Invoice date (\d{2})-(\d{2})-(\d{4})$", line): return date(int(m[3]), int(m[2]), int(m[1])) raise Exception("Mollie invoice date not found") def extract(self, filepath, existing): for line in pdf_lines(filepath): if m := re.search(r"^Total including VAT ([\d.]+) EUR$", line): total = D(m[1]) break else: raise Exception("Mollie invoice total not found") contra = "Passiva:RevBank" name = self.filename(filepath) date = self.date(filepath) link = self.tx_ref(filepath) units = amount.Amount(total, "EUR") doc = Document( meta=data.new_metadata(filepath, 0), date=date, account=self.account(filepath), filename=f"docs/Uitgaven/Bankkosten/{date}.{name}", tags=set(), links={link}, ) tx = Transaction( meta=data.new_metadata(filepath, 0), date=date, flag=flags.FLAG_OKAY, payee="Mollie B.V.", narration="iDeal transactiekosten", tags=set(), links={link}, postings=[ Posting(self.account(filepath), units, None, None, None, None), Posting(contra, -units, None, None, None, None), ], ) return [tx, doc] class StatiegeldImporter(Importer): def identify(self, filepath): mimetype, encoding = mimetypes.guess_type(filepath) if mimetype != "application/pdf": return False return any( line.startswith("Statiegeld Nederland") for line in pdf_lines(filepath) ) def account(self, filepath): return "Inkomsten:Statiegeld" def tx_ref(self, filepath): for line in pdf_lines(filepath): if m := re.search(r"^Factuurnr : (RP\d+)$", line): return f"SNL-{m[1]}" raise Exception("Mollie invoice reference not found") def filename(self, filepath): return f"{self.tx_ref(filepath)}.pdf" def date(self, filepath): for line in pdf_lines(filepath): if m := re.search(r"^Datum : (\d{2})/(\d{2})/(\d{4})$", line): return date(int(m[3]), int(m[2]), int(m[1])) raise Exception("Date not found") def extract(self, filepath, existing): name = self.filename(filepath) date = self.date(filepath) link = self.tx_ref(filepath) doc = Document( meta=data.new_metadata(filepath, 0), date=date, account=self.account(filepath), filename=f"docs/Inkomsten/Statiegeld/{date}.{name}", tags=set(), links={link}, ) return [doc]