Annotate products that could not be found by scrapers

This commit is contained in:
polyfloyd 2025-04-21 11:29:27 +02:00
parent d69f35cfcc
commit 650aef9794
3 changed files with 32 additions and 3 deletions

View file

@ -78,6 +78,11 @@ def update_product_pricings(src):
logging.debug('no auto update: "%s"', product.description) logging.debug('no auto update: "%s"', product.description)
lines_out.append(line) lines_out.append(line)
continue continue
except scrapers.ProductNotFoundError:
logging.warn('not found "%s"', product.description)
product.metadata['err'] = 'not_found'
lines_out.append(product.format_line())
continue
except Exception as err: except Exception as err:
logging.error('did not update "%s": %s', product.description, err) logging.error('did not update "%s": %s', product.description, err)
lines_out.append(line) lines_out.append(line)
@ -92,6 +97,9 @@ def update_product_pricings(src):
previous_price = product.price previous_price = product.price
product.price = (unit_price * 20).quantize(Decimal('1'), rounding=ROUND_UP) / 20 product.price = (unit_price * 20).quantize(Decimal('1'), rounding=ROUND_UP) / 20
if 'err' in product.metadata:
del product.metadata['err']
lines_out.append(product.format_line()) lines_out.append(product.format_line())
logging.debug(f'Found "{prod_info.name}", buy €{prod_info.price/prod_info.units:.2f}, sell €{product.price:.2f}') logging.debug(f'Found "{prod_info.name}", buy €{prod_info.price/prod_info.units:.2f}, sell €{product.price:.2f}')

View file

@ -14,6 +14,11 @@ from typing import List
vat = Decimal('1.09') vat = Decimal('1.09')
class ProductNotFoundError(Exception):
def __init__(self):
super().__init__('product not found')
@dataclass @dataclass
class Product: class Product:
name: str name: str
@ -35,7 +40,12 @@ def ah_get_by_gtin(gtin13):
if not _ah: if not _ah:
_ah = AHConnector() _ah = AHConnector()
try:
ah_prod = _ah.get_product_by_barcode(gtin13) ah_prod = _ah.get_product_by_barcode(gtin13)
except requests.exceptions.HTTPError as err:
if err.response.status_code == 404:
raise ProductNotFoundError()
raise err
units_description = ah_prod['salesUnitSize'] units_description = ah_prod['salesUnitSize']
units = 1 units = 1
@ -88,7 +98,7 @@ def sligro_get_by_gtin(gtin13):
if 'products' in body: if 'products' in body:
break break
else: else:
raise Exception(f'sligro: {gtin13} not found') raise ProductNotFoundError()
product = body['products'][0] product = body['products'][0]
sku = product["code"] sku = product["code"]

View file

@ -1,4 +1,5 @@
from scrapers import ah_get_by_gtin, sligro_get_by_gtin, parse_content_description, Product from scrapers import ah_get_by_gtin, sligro_get_by_gtin, parse_content_description, Product, ProductNotFoundError
import pytest
def test_scrape_ah(): def test_scrape_ah():
@ -11,6 +12,11 @@ def test_scrape_ah():
assert prod.aliases == [] assert prod.aliases == []
def test_scrape_ah_not_found():
with pytest.raises(ProductNotFoundError):
ah_get_by_gtin('9999999999999')
def test_scrape_sligro(): def test_scrape_sligro():
# Cola zero sugar # Cola zero sugar
prod = sligro_get_by_gtin('5000112659184') prod = sligro_get_by_gtin('5000112659184')
@ -21,6 +27,11 @@ def test_scrape_sligro():
assert prod.aliases == ['5000112658873'] assert prod.aliases == ['5000112658873']
def test_scrape_sligro_not_found():
with pytest.raises(ProductNotFoundError):
sligro_get_by_gtin('9999999999999')
def test_parse_content_description(): def test_parse_content_description():
assert parse_content_description('40 stuks x 22,5 gram') == (40, '22,5 gram') assert parse_content_description('40 stuks x 22,5 gram') == (40, '22,5 gram')
assert parse_content_description('4 multipacks x 6 blikjes x 33 cl') == (24, '33 cl') assert parse_content_description('4 multipacks x 6 blikjes x 33 cl') == (24, '33 cl')