revbank-inflatinator/inflatinator/scrapers.py
2024-01-05 16:18:40 +01:00

37 lines
899 B
Python

from decimal import Decimal
from pyquery import PyQuery as pq
import json
import re
import subprocess
def get(url):
compl = subprocess.run(['links', '-source', url], capture_output=True)
return compl.stdout
def ah_get_by_sku(ah_sku):
assert re.match('^wi\d+$', ah_sku)
html_src = get(f'https://www.ah.nl/producten/product/{ah_sku}')
doc = pq(html_src)
ld_jsons = doc('script[type="application/ld+json"]')
for j in ld_jsons:
schema = json.loads(j.text)
if schema['@type'] == 'Product' and schema['sku'] == ah_sku:
break
else:
raise Exception(f'ah.nl returned no JSON metadata for SKU {ah_sku}')
name = schema['name']
ean = schema['gtin13']
sku = schema['sku']
price = Decimal(schema['offers']['price'])
return {
'name': name,
'price': price,
'ean': ean,
'sku': sku,
}