From 3511dd75ac6911c03255f6c6c099811781002698 Mon Sep 17 00:00:00 2001 From: polyfloyd Date: Fri, 5 Jan 2024 16:18:09 +0100 Subject: [PATCH] Initial commit --- .envrc | 1 + .gitignore | 2 ++ inflatinator/__main__.py | 16 +++++++++++++ inflatinator/revbank.py | 50 ++++++++++++++++++++++++++++++++++++++++ inflatinator/scrapers.py | 37 +++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+) create mode 100644 .envrc create mode 100644 .gitignore create mode 100644 inflatinator/__main__.py create mode 100644 inflatinator/revbank.py create mode 100644 inflatinator/scrapers.py diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..175de89 --- /dev/null +++ b/.envrc @@ -0,0 +1 @@ +layout python diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..533bcab --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +/.direnv diff --git a/inflatinator/__main__.py b/inflatinator/__main__.py new file mode 100644 index 0000000..f8f7083 --- /dev/null +++ b/inflatinator/__main__.py @@ -0,0 +1,16 @@ +import revbank +import sys + + +def main(product_file): + with open(product_file, 'r') as fd: + src = fd.read() + + new_src = revbank.update_product_pricings(src) + + with open(product_file, 'w') as fd: + fd.write(new_src) + + +if __name__ == '__main__': + main(sys.argv[1]) diff --git a/inflatinator/revbank.py b/inflatinator/revbank.py new file mode 100644 index 0000000..8e3e38d --- /dev/null +++ b/inflatinator/revbank.py @@ -0,0 +1,50 @@ +import re +import scrapers +from decimal import Decimal, ROUND_UP + + +our_margin = Decimal('1.3') + + +def find_product_details(vendor_and_sku): + [vendor, sku] = vendor_and_sku.split(':', 2) + + if vendor == 'ah': + return scrapers.ah_get_by_sku(sku) + + raise Exception(f'unknown vendor: {vendor}') + + +def update_product_pricings(src): + lines = src.split('\n') + + find_updatable = re.compile(r'#\s*(?P\S+)\s+(?P\d+)x$') + find_aliases = re.compile(r'^(?P\S+)') + + lines_out = [] + + for line in lines: + m = find_updatable.search(line) + if not m: + lines_out.append(line) + continue + + d = find_product_details(m['sku']) + + product_aliases = set() + if not line.startswith('#'): + product_aliases = set(find_aliases.search(line)['aliases'].split(',')) + product_aliases.add(d['ean']) + + aliases = ','.join(sorted(product_aliases)) + units = int(m["units"]) + price = d['price'] + + # Apply a 30% margin and divide by the number of units per sold packaging. + unit_price = price * our_margin / units + # Round up to 5ct. + unit_price = (unit_price * 20).quantize(Decimal('1'), rounding=ROUND_UP) / 20 + + lines_out.append(f'{aliases}\t{unit_price:.2f}\t{d["name"]} # {m["sku"]} {units}x') + + return '\n'.join(lines_out) diff --git a/inflatinator/scrapers.py b/inflatinator/scrapers.py new file mode 100644 index 0000000..78c43ea --- /dev/null +++ b/inflatinator/scrapers.py @@ -0,0 +1,37 @@ +from decimal import Decimal +from pyquery import PyQuery as pq +import json +import re +import subprocess + + +def get(url): + compl = subprocess.run(['links', '-source', url], capture_output=True) + return compl.stdout + + +def ah_get_by_sku(ah_sku): + assert re.match('^wi\d+$', ah_sku) + + html_src = get(f'https://www.ah.nl/producten/product/{ah_sku}') + doc = pq(html_src) + + ld_jsons = doc('script[type="application/ld+json"]') + for j in ld_jsons: + schema = json.loads(j.text) + if schema['@type'] == 'Product' and schema['sku'] == ah_sku: + break + else: + raise Exception(f'ah.nl returned no JSON metadata for SKU {ah_sku}') + + name = schema['name'] + ean = schema['gtin13'] + sku = schema['sku'] + price = Decimal(schema['offers']['price']) + + return { + 'name': name, + 'price': price, + 'ean': ean, + 'sku': sku, + }