Initial commit

This commit is contained in:
polyfloyd 2024-01-05 16:18:09 +01:00
commit 3511dd75ac
5 changed files with 106 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
layout python

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
__pycache__
/.direnv

16
inflatinator/__main__.py Normal file
View file

@ -0,0 +1,16 @@
import revbank
import sys
def main(product_file):
with open(product_file, 'r') as fd:
src = fd.read()
new_src = revbank.update_product_pricings(src)
with open(product_file, 'w') as fd:
fd.write(new_src)
if __name__ == '__main__':
main(sys.argv[1])

50
inflatinator/revbank.py Normal file
View file

@ -0,0 +1,50 @@
import re
import scrapers
from decimal import Decimal, ROUND_UP
our_margin = Decimal('1.3')
def find_product_details(vendor_and_sku):
[vendor, sku] = vendor_and_sku.split(':', 2)
if vendor == 'ah':
return scrapers.ah_get_by_sku(sku)
raise Exception(f'unknown vendor: {vendor}')
def update_product_pricings(src):
lines = src.split('\n')
find_updatable = re.compile(r'#\s*(?P<sku>\S+)\s+(?P<units>\d+)x$')
find_aliases = re.compile(r'^(?P<aliases>\S+)')
lines_out = []
for line in lines:
m = find_updatable.search(line)
if not m:
lines_out.append(line)
continue
d = find_product_details(m['sku'])
product_aliases = set()
if not line.startswith('#'):
product_aliases = set(find_aliases.search(line)['aliases'].split(','))
product_aliases.add(d['ean'])
aliases = ','.join(sorted(product_aliases))
units = int(m["units"])
price = d['price']
# Apply a 30% margin and divide by the number of units per sold packaging.
unit_price = price * our_margin / units
# Round up to 5ct.
unit_price = (unit_price * 20).quantize(Decimal('1'), rounding=ROUND_UP) / 20
lines_out.append(f'{aliases}\t{unit_price:.2f}\t{d["name"]} # {m["sku"]} {units}x')
return '\n'.join(lines_out)

37
inflatinator/scrapers.py Normal file
View file

@ -0,0 +1,37 @@
from decimal import Decimal
from pyquery import PyQuery as pq
import json
import re
import subprocess
def get(url):
compl = subprocess.run(['links', '-source', url], capture_output=True)
return compl.stdout
def ah_get_by_sku(ah_sku):
assert re.match('^wi\d+$', ah_sku)
html_src = get(f'https://www.ah.nl/producten/product/{ah_sku}')
doc = pq(html_src)
ld_jsons = doc('script[type="application/ld+json"]')
for j in ld_jsons:
schema = json.loads(j.text)
if schema['@type'] == 'Product' and schema['sku'] == ah_sku:
break
else:
raise Exception(f'ah.nl returned no JSON metadata for SKU {ah_sku}')
name = schema['name']
ean = schema['gtin13']
sku = schema['sku']
price = Decimal(schema['offers']['price'])
return {
'name': name,
'price': price,
'ean': ean,
'sku': sku,
}