131 lines
4.2 KiB
Python
131 lines
4.2 KiB
Python
from pypdf import PdfReader
|
|
from beangulp import mimetypes, Importer
|
|
from beangulp.cache import cache
|
|
import re
|
|
from datetime import date
|
|
from beancount.core.data import Transaction, Posting, Document
|
|
from beancount.core import flags
|
|
from beancount.core import data
|
|
from beancount.core import amount
|
|
from beancount.core.number import D
|
|
|
|
|
|
@cache
|
|
def pdf_to_text(filename):
|
|
reader = PdfReader(filename)
|
|
page1 = reader.pages[0]
|
|
return page1.extract_text()
|
|
|
|
|
|
class MollieInvoiceImporter(Importer):
|
|
def identify(self, filepath):
|
|
mimetype, encoding = mimetypes.guess_type(filepath)
|
|
if mimetype != "application/pdf":
|
|
return False
|
|
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
return any(line.startswith("Mollie B.V.") for line in lines)
|
|
|
|
def account(self, filepath):
|
|
return "Uitgaven:Bankkosten"
|
|
|
|
def tx_ref(self, filepath):
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
for line in lines:
|
|
if m := re.search(r"^Invoice reference (MOL-.+)$", line):
|
|
return m[1]
|
|
raise Exception("Mollie invoice reference not found")
|
|
|
|
def filename(self, filepath):
|
|
return f"{self.tx_ref(filepath)}.pdf"
|
|
|
|
def date(self, filepath):
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
for line in lines:
|
|
if m := re.search(r"^Invoice date (\d{2})-(\d{2})-(\d{4})$", line):
|
|
return date(int(m[3]), int(m[2]), int(m[1]))
|
|
raise Exception("Mollie invoice date not found")
|
|
|
|
def extract(self, filepath, existing):
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
for line in lines:
|
|
if m := re.search(r"^Total including VAT ([\d.]+) EUR$", line):
|
|
total = D(m[1])
|
|
break
|
|
else:
|
|
raise Exception("Mollie invoice total not found")
|
|
|
|
contra = "Passiva:RevBank"
|
|
|
|
name = self.filename(filepath)
|
|
date = self.date(filepath)
|
|
link = self.tx_ref(filepath)
|
|
|
|
units = amount.Amount(total, "EUR")
|
|
doc = Document(
|
|
meta=data.new_metadata(filepath, 0),
|
|
date=date,
|
|
account=self.account(filepath),
|
|
filename=f"docs/Uitgaven/Bankkosten/{date}.{name}",
|
|
tags=set(),
|
|
links={link},
|
|
)
|
|
tx = Transaction(
|
|
meta=data.new_metadata(filepath, 0),
|
|
date=date,
|
|
flag=flags.FLAG_OKAY,
|
|
payee="Mollie B.V.",
|
|
narration="iDeal transactiekosten",
|
|
tags=set(),
|
|
links={link},
|
|
postings=[
|
|
Posting(self.account(filepath), units, None, None, None, None),
|
|
Posting(contra, -units, None, None, None, None),
|
|
],
|
|
)
|
|
return [tx, doc]
|
|
|
|
|
|
class StatiegeldImporter(Importer):
|
|
def identify(self, filepath):
|
|
mimetype, encoding = mimetypes.guess_type(filepath)
|
|
if mimetype != "application/pdf":
|
|
return False
|
|
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
return any(line.startswith("Statiegeld Nederland") for line in lines)
|
|
|
|
def account(self, filepath):
|
|
return "Inkomsten:Statiegeld"
|
|
|
|
def tx_ref(self, filepath):
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
for line in lines:
|
|
if m := re.search(r"^Factuurnr : (RP\d+)$", line):
|
|
return f"SNL-{m[1]}"
|
|
raise Exception("Mollie invoice reference not found")
|
|
|
|
def filename(self, filepath):
|
|
return f"{self.tx_ref(filepath)}.pdf"
|
|
|
|
def date(self, filepath):
|
|
lines = pdf_to_text(filepath).split("\n")
|
|
for line in lines:
|
|
if m := re.search(r"^Datum : (\d{2})/(\d{2})/(\d{4})$", line):
|
|
return date(int(m[3]), int(m[2]), int(m[1]))
|
|
raise Exception("Date not found")
|
|
|
|
def extract(self, filepath, existing):
|
|
name = self.filename(filepath)
|
|
date = self.date(filepath)
|
|
link = self.tx_ref(filepath)
|
|
|
|
doc = Document(
|
|
meta=data.new_metadata(filepath, 0),
|
|
date=date,
|
|
account=self.account(filepath),
|
|
filename=f"docs/Inkomsten/Statiegeld/{date}.{name}",
|
|
tags=set(),
|
|
links={link},
|
|
)
|
|
return [doc]
|