Initial commit

This commit is contained in:
polyfloyd 2024-01-05 16:18:09 +01:00
commit 3511dd75ac
5 changed files with 106 additions and 0 deletions

37
inflatinator/scrapers.py Normal file
View file

@ -0,0 +1,37 @@
from decimal import Decimal
from pyquery import PyQuery as pq
import json
import re
import subprocess
def get(url):
compl = subprocess.run(['links', '-source', url], capture_output=True)
return compl.stdout
def ah_get_by_sku(ah_sku):
assert re.match('^wi\d+$', ah_sku)
html_src = get(f'https://www.ah.nl/producten/product/{ah_sku}')
doc = pq(html_src)
ld_jsons = doc('script[type="application/ld+json"]')
for j in ld_jsons:
schema = json.loads(j.text)
if schema['@type'] == 'Product' and schema['sku'] == ah_sku:
break
else:
raise Exception(f'ah.nl returned no JSON metadata for SKU {ah_sku}')
name = schema['name']
ean = schema['gtin13']
sku = schema['sku']
price = Decimal(schema['offers']['price'])
return {
'name': name,
'price': price,
'ean': ean,
'sku': sku,
}