Initial commit
This commit is contained in:
commit
3511dd75ac
5 changed files with 106 additions and 0 deletions
37
inflatinator/scrapers.py
Normal file
37
inflatinator/scrapers.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
from decimal import Decimal
|
||||
from pyquery import PyQuery as pq
|
||||
import json
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
|
||||
def get(url):
|
||||
compl = subprocess.run(['links', '-source', url], capture_output=True)
|
||||
return compl.stdout
|
||||
|
||||
|
||||
def ah_get_by_sku(ah_sku):
|
||||
assert re.match('^wi\d+$', ah_sku)
|
||||
|
||||
html_src = get(f'https://www.ah.nl/producten/product/{ah_sku}')
|
||||
doc = pq(html_src)
|
||||
|
||||
ld_jsons = doc('script[type="application/ld+json"]')
|
||||
for j in ld_jsons:
|
||||
schema = json.loads(j.text)
|
||||
if schema['@type'] == 'Product' and schema['sku'] == ah_sku:
|
||||
break
|
||||
else:
|
||||
raise Exception(f'ah.nl returned no JSON metadata for SKU {ah_sku}')
|
||||
|
||||
name = schema['name']
|
||||
ean = schema['gtin13']
|
||||
sku = schema['sku']
|
||||
price = Decimal(schema['offers']['price'])
|
||||
|
||||
return {
|
||||
'name': name,
|
||||
'price': price,
|
||||
'ean': ean,
|
||||
'sku': sku,
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue