"""End-to-end integration tests for the data pipeline. Tests the full flow: scraper output → normalization → product matching → DB storage → price tracking → shrinkflation detection → event publishing. Uses real test fixtures with an in-memory SQLite database, not mocks. """ import uuid from datetime import date from decimal import Decimal from unittest.mock import MagicMock import pytest from sqlalchemy import create_engine, select from sqlalchemy.orm import Session, sessionmaker from cartsnitch_common.constants import ( EventType, SizeUnit, StoreSlug, ) from cartsnitch_common.events import publish_event from cartsnitch_common.models import ( Base, NormalizedProduct, PriceHistory, Purchase, PurchaseItem, ShrinkflationEvent, Store, User, ) from cartsnitch_common.pipeline.matching import ProductMatcher from cartsnitch_common.pipeline.price_tracking import ( PriceDelta, get_price_trend, record_price_from_item, ) from cartsnitch_common.pipeline.receipt import normalize_receipt, parse_meijer_item from cartsnitch_common.pipeline.shrinkflation import detect_shrinkflation from cartsnitch_common.schemas.events import EventEnvelope from cartsnitch_common.schemas.purchase import PurchaseCreate # --------------------------------------------------------------------------- # Fixtures: realistic scraper output from Meijer # --------------------------------------------------------------------------- MEIJER_RECEIPT_FIXTURE = { "receiptId": "MJ-2026-03-15-00042", "date": "2026-03-15", "total": "47.82", "subtotal": "44.50", "taxAmount": "3.32", "totalSavings": "6.20", "items": [ { "description": " Meijer Whole Milk 1 Gallon ", "upcCode": "00041250010001", "quantity": 1, "unitPrice": "3.29", "extendedPrice": "3.29", "regularPrice": "3.49", "salePrice": "3.29", "category": "Dairy", }, { "name": "BARILLA SPAGHETTI 16 OZ", "upc": "076808280753", "qty": 2, "price": "1.69", "totalPrice": "3.38", "regularPrice": "1.89", "couponDiscount": "0.40", "department": "Pantry", }, { "description": "Meijer Lean Ground Beef 1 lb", "upcCode": "00041250022004", "quantity": 1, "unitPrice": "5.99", "extendedPrice": "5.99", "regularPrice": "6.49", "loyaltyDiscount": "0.50", "category": "Meat", }, { "description": "Cheerios Original 12 oz", "upcCode": "016000275645", "quantity": 1, "unitPrice": "4.49", "extendedPrice": "4.49", "regularPrice": "4.49", "category": "Snacks", }, { "description": "Fresh Bananas", "quantity": 1, "unitPrice": "0.69", "extendedPrice": "0.69", "category": "Produce", }, ], } MEIJER_RECEIPT_SECOND_VISIT = { "receiptId": "MJ-2026-03-18-00099", "date": "2026-03-18", "total": "12.47", "items": [ { "description": "Meijer Whole Milk 1 Gallon", "upcCode": "00041250010001", "quantity": 1, "unitPrice": "3.49", "extendedPrice": "3.49", "regularPrice": "3.49", "category": "Dairy", }, { "description": "BARILLA SPAGHETTI 16 OZ", "upc": "076808280753", "qty": 1, "price": "1.99", "totalPrice": "1.99", "regularPrice": "1.99", "department": "Pantry", }, { "description": "Cheerios Original 10.8 oz", "upcCode": "016000275645", "quantity": 1, "unitPrice": "4.49", "extendedPrice": "4.49", "regularPrice": "4.49", "category": "Snacks", }, ], } @pytest.fixture def e2e_engine(): """In-memory SQLite engine for E2E tests.""" eng = create_engine("sqlite:///:memory:") Base.metadata.create_all(eng) yield eng eng.dispose() @pytest.fixture def e2e_session(e2e_engine): """SQLAlchemy session with pre-seeded store and user.""" factory = sessionmaker(bind=e2e_engine) with factory() as sess: yield sess @pytest.fixture def store(e2e_session: Session) -> Store: """Seed a Meijer store.""" s = Store(id=uuid.uuid4(), name="Meijer", slug=StoreSlug.MEIJER) e2e_session.add(s) e2e_session.flush() return s @pytest.fixture def user(e2e_session: Session) -> User: """Seed a test user.""" u = User( id=uuid.uuid4(), email="tester@cartsnitch.com", hashed_password="hashed_test_password", display_name="Test User", ) e2e_session.add(u) e2e_session.flush() return u @pytest.fixture def redis_mock(): """A lightweight Redis mock that captures published messages.""" client = MagicMock() published: list[tuple[str, str]] = [] def _publish(channel: str, message: str) -> int: published.append((channel, message)) return 1 client.publish = MagicMock(side_effect=_publish) client._published = published return client # =========================================================================== # Test class: Full pipeline E2E — scraper → normalization → matching → storage # =========================================================================== class TestFullPipelineE2E: """Scraper output → normalize_receipt → ProductMatcher → DB storage.""" def test_normalize_meijer_receipt(self, user: User, store: Store): """Raw Meijer receipt normalizes into a valid PurchaseCreate.""" purchase = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) assert isinstance(purchase, PurchaseCreate) assert purchase.receipt_id == "MJ-2026-03-15-00042" assert purchase.purchase_date == date(2026, 3, 15) assert purchase.total == Decimal("47.82") assert purchase.subtotal == Decimal("44.50") assert purchase.tax == Decimal("3.32") assert purchase.savings_total == Decimal("6.20") assert len(purchase.items) == 5 assert purchase.raw_data == MEIJER_RECEIPT_FIXTURE def test_item_field_normalization(self, user: User, store: Store): """Items parse correctly regardless of field name variants.""" purchase = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) # Item using 'description' / 'upcCode' fields milk = purchase.items[0] assert milk.product_name_raw == "Meijer Whole Milk 1 Gallon" assert milk.upc == "41250010001" # leading zeros stripped assert milk.unit_price == Decimal("3.29") # Item using 'name' / 'upc' / 'qty' / 'price' / 'totalPrice' fields pasta = purchase.items[1] assert pasta.product_name_raw == "BARILLA SPAGHETTI 16 OZ" assert pasta.upc == "76808280753" assert pasta.quantity == Decimal("2") assert pasta.extended_price == Decimal("3.38") assert pasta.coupon_discount == Decimal("0.40") def test_upc_product_matching_and_storage(self, e2e_session: Session, user: User, store: Store): """Full flow: normalize → match → store in DB. UPC matching works E2E.""" purchase_schema = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) # Run product matching matcher = ProductMatcher(e2e_session, auto_create=True) outcomes = matcher.match_items(purchase_schema.items) assert len(outcomes) == 5 # First item has a UPC — auto_create makes a new product assert outcomes[0].created_new is True # Store the purchase in DB purchase_db = Purchase( id=uuid.uuid4(), user_id=user.id, store_id=store.id, receipt_id=purchase_schema.receipt_id, purchase_date=purchase_schema.purchase_date, total=purchase_schema.total, subtotal=purchase_schema.subtotal, tax=purchase_schema.tax, savings_total=purchase_schema.savings_total, raw_data=purchase_schema.raw_data, ) e2e_session.add(purchase_db) e2e_session.flush() # Store items linked to the purchase and matched products for _i, item_schema in enumerate(purchase_schema.items): item_db = PurchaseItem( id=uuid.uuid4(), purchase_id=purchase_db.id, product_name_raw=item_schema.product_name_raw, upc=item_schema.upc, quantity=item_schema.quantity, unit_price=item_schema.unit_price, extended_price=item_schema.extended_price, regular_price=item_schema.regular_price, sale_price=item_schema.sale_price, coupon_discount=item_schema.coupon_discount, loyalty_discount=item_schema.loyalty_discount, category_raw=item_schema.category_raw, ) e2e_session.add(item_db) e2e_session.flush() # Verify data persisted correctly stored_purchase = e2e_session.execute( select(Purchase).where(Purchase.receipt_id == "MJ-2026-03-15-00042") ).scalar_one() assert stored_purchase.total == Decimal("47.82") assert stored_purchase.user_id == user.id assert stored_purchase.store_id == store.id stored_items = ( e2e_session.execute( select(PurchaseItem).where(PurchaseItem.purchase_id == stored_purchase.id) ) .scalars() .all() ) assert len(stored_items) == 5 # Verify products were created in normalized_products table products = e2e_session.execute(select(NormalizedProduct)).scalars().all() assert len(products) == 5 # all 5 items auto-created products def test_second_visit_reuses_existing_products( self, e2e_session: Session, user: User, store: Store ): """On second receipt, products matched by UPC reuse existing records.""" # Ingest first receipt first = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) matcher = ProductMatcher(e2e_session, auto_create=True) matcher.match_items(first.items) products_after_first = e2e_session.execute(select(NormalizedProduct)).scalars().all() first_count = len(products_after_first) # Ingest second receipt — overlapping UPCs second = normalize_receipt( MEIJER_RECEIPT_SECOND_VISIT, user_id=str(user.id), store_id=str(store.id), ) second_outcomes = matcher.match_items(second.items) # Milk, pasta, cheerios should match existing by UPC assert second_outcomes[0].created_new is False # milk — UPC match assert second_outcomes[1].created_new is False # pasta — UPC match assert second_outcomes[2].created_new is False # cheerios — UPC match products_after_second = e2e_session.execute(select(NormalizedProduct)).scalars().all() assert len(products_after_second) == first_count # no new products created # =========================================================================== # Test class: Price tracking and shrinkflation detection E2E # =========================================================================== class TestPriceTrackingE2E: """Price recording from stored items and price delta detection.""" def test_price_recorded_from_ingested_receipt( self, e2e_session: Session, user: User, store: Store ): """Ingest receipt → match products → record prices → verify price history.""" purchase_schema = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) matcher = ProductMatcher(e2e_session, auto_create=True) outcomes = matcher.match_items(purchase_schema.items) # Record prices for each matched item price_entries = [] for i, item_schema in enumerate(purchase_schema.items): product = outcomes[i].match.product if outcomes[i].match else None if product is None: # Was auto-created — find the product directly products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if p.canonical_name == item_schema.product_name_raw: product = p break if product: entry, delta = record_price_from_item( e2e_session, product_id=product.id, store_id=store.id, observed_date=purchase_schema.purchase_date, regular_price=item_schema.regular_price or item_schema.unit_price, sale_price=item_schema.sale_price, ) price_entries.append((entry, delta)) # First ingestion — no deltas expected assert all(delta is None for _, delta in price_entries) # Verify price history stored all_prices = e2e_session.execute(select(PriceHistory)).scalars().all() assert len(all_prices) >= 4 # at least the items with regular_price def test_price_increase_detected_on_second_receipt( self, e2e_session: Session, user: User, store: Store ): """Second receipt with higher price triggers a PriceDelta.""" # Ingest first receipt first = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) matcher = ProductMatcher(e2e_session, auto_create=True) first_outcomes = matcher.match_items(first.items) # Record first prices for i, item_schema in enumerate(first.items): product = first_outcomes[i].match.product if first_outcomes[i].match else None if product is None: products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if p.canonical_name == item_schema.product_name_raw: product = p break if product: record_price_from_item( e2e_session, product_id=product.id, store_id=store.id, observed_date=first.purchase_date, regular_price=item_schema.regular_price or item_schema.unit_price, sale_price=item_schema.sale_price, ) # Ingest second receipt — pasta price went up ($1.89 → $1.99) second = normalize_receipt( MEIJER_RECEIPT_SECOND_VISIT, user_id=str(user.id), store_id=str(store.id), ) second_outcomes = matcher.match_items(second.items) # Record second prices and capture deltas deltas: list[PriceDelta] = [] for i, item_schema in enumerate(second.items): product = second_outcomes[i].match.product if second_outcomes[i].match else None if product is None: products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if p.canonical_name == item_schema.product_name_raw: product = p break if product: _, delta = record_price_from_item( e2e_session, product_id=product.id, store_id=store.id, observed_date=second.purchase_date, regular_price=item_schema.regular_price or item_schema.unit_price, sale_price=item_schema.sale_price, ) if delta: deltas.append(delta) # Milk went from $3.49 → $3.49 (no change); pasta from $1.89 → $1.99 (increase) price_increases = [d for d in deltas if d.is_increase] assert len(price_increases) >= 1 pasta_delta = next( (d for d in price_increases if d.old_price == Decimal("1.89")), None, ) assert pasta_delta is not None assert pasta_delta.new_price == Decimal("1.99") assert pasta_delta.change_amount == Decimal("0.10") assert pasta_delta.is_increase is True def test_price_trend_across_visits(self, e2e_session: Session, user: User, store: Store): """get_price_trend returns ordered history after multiple ingestions.""" # Create a product manually product = NormalizedProduct( id=uuid.uuid4(), canonical_name="Test Product", upc_variants=["1234567890"], ) e2e_session.add(product) e2e_session.flush() # Record 3 prices on different dates dates_prices = [ (date(2026, 3, 10), Decimal("2.99")), (date(2026, 3, 13), Decimal("3.19")), (date(2026, 3, 16), Decimal("2.79")), ] for obs_date, price in dates_prices: record_price_from_item( e2e_session, product_id=product.id, store_id=store.id, observed_date=obs_date, regular_price=price, ) trend = get_price_trend(e2e_session, product.id, store.id) assert len(trend) == 3 # Newest first assert trend[0].regular_price == Decimal("2.79") assert trend[1].regular_price == Decimal("3.19") assert trend[2].regular_price == Decimal("2.99") class TestShrinkflationE2E: """Shrinkflation detection integrated with product matching.""" def test_shrinkflation_detected_from_receipt_data( self, e2e_session: Session, user: User, store: Store ): """Cheerios went from 12 oz → 10.8 oz between receipts. Detect shrinkflation.""" # Ingest first receipt — creates Cheerios product with size from name first = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) matcher = ProductMatcher(e2e_session, auto_create=True) first_outcomes = matcher.match_items(first.items) # Find the Cheerios product (index 3 in fixture) cheerios_product = None for outcome in first_outcomes: if outcome.match and outcome.match.product: p = outcome.match.product else: # Check auto-created products products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if "cheerios" in p.canonical_name.lower(): cheerios_product = p break if cheerios_product: break else: products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if "cheerios" in p.canonical_name.lower(): cheerios_product = p break assert cheerios_product is not None # The auto-created product should have extracted "12" and "oz" from name assert cheerios_product.size == "12" assert cheerios_product.size_unit == SizeUnit.OZ # Now detect shrinkflation: 12 oz → 10.8 oz event = detect_shrinkflation( e2e_session, product=cheerios_product, new_size="10.8", new_unit=SizeUnit.OZ, new_price=Decimal("4.49"), detected_date=date(2026, 3, 18), ) assert event is not None assert isinstance(event, ShrinkflationEvent) assert event.old_size == "12" assert event.new_size == "10.8" assert event.old_unit == SizeUnit.OZ assert event.new_unit == SizeUnit.OZ assert event.confidence >= Decimal("0.85") # 10% decrease → 0.95 # Verify stored in DB stored = e2e_session.execute( select(ShrinkflationEvent).where( ShrinkflationEvent.normalized_product_id == cheerios_product.id ) ).scalar_one() assert stored.id == event.id def test_shrinkflation_dedup_on_repeat_detection( self, e2e_session: Session, user: User, store: Store ): """Same shrinkflation detected twice returns the existing event, not a duplicate.""" product = NormalizedProduct( id=uuid.uuid4(), canonical_name="Brand X Cereal 15 oz", size="15", size_unit=SizeUnit.OZ, upc_variants=["999888777"], ) e2e_session.add(product) e2e_session.flush() first = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ) second = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ) assert first is not None assert second is not None assert first.id == second.id # same event, not duplicated count = len( e2e_session.execute( select(ShrinkflationEvent).where( ShrinkflationEvent.normalized_product_id == product.id ) ) .scalars() .all() ) assert count == 1 # =========================================================================== # Test class: Event bus pub/sub for pipeline stage transitions # =========================================================================== class TestEventBusE2E: """Redis event publishing at each pipeline stage.""" def test_receipt_ingested_event(self, redis_mock, user: User, store: Store): """publish_event sends a valid EventEnvelope for RECEIPTS_INGESTED.""" purchase_schema = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) subscribers = publish_event( redis_mock, EventType.RECEIPTS_INGESTED, service="receiptwitness", payload={ "receipt_id": purchase_schema.receipt_id, "user_id": str(user.id), "store_slug": StoreSlug.MEIJER, "item_count": len(purchase_schema.items), "total": str(purchase_schema.total), }, ) assert subscribers == 1 assert len(redis_mock._published) == 1 channel, raw_msg = redis_mock._published[0] assert channel == EventType.RECEIPTS_INGESTED.value # Deserialize and validate the envelope envelope = EventEnvelope.model_validate_json(raw_msg) assert envelope.event_type == EventType.RECEIPTS_INGESTED assert envelope.service == "receiptwitness" assert envelope.payload["receipt_id"] == "MJ-2026-03-15-00042" assert envelope.payload["item_count"] == 5 def test_price_updated_event(self, redis_mock, user: User, store: Store): """publish_event sends a valid envelope for PRICES_UPDATED.""" subscribers = publish_event( redis_mock, EventType.PRICES_UPDATED, service="cartsnitch-common", payload={ "product_id": str(uuid.uuid4()), "store_slug": StoreSlug.MEIJER, "old_price": "1.89", "new_price": "1.99", "change_percent": "5.29", }, ) assert subscribers == 1 channel, raw_msg = redis_mock._published[0] assert channel == EventType.PRICES_UPDATED.value envelope = EventEnvelope.model_validate_json(raw_msg) assert envelope.event_type == EventType.PRICES_UPDATED assert envelope.payload["old_price"] == "1.89" def test_products_normalized_event(self, redis_mock, user: User, store: Store): """publish_event sends a valid envelope for PRODUCTS_NORMALIZED.""" product_id = str(uuid.uuid4()) subscribers = publish_event( redis_mock, EventType.PRODUCTS_NORMALIZED, service="cartsnitch-common", payload={ "product_id": product_id, "canonical_name": "Barilla Spaghetti", "match_method": "upc", "confidence": "high", }, ) assert subscribers == 1 channel, raw_msg = redis_mock._published[0] assert channel == EventType.PRODUCTS_NORMALIZED.value envelope = EventEnvelope.model_validate_json(raw_msg) assert envelope.payload["confidence"] == "high" def test_shrinkflation_alert_event(self, redis_mock, user: User, store: Store): """publish_event sends a valid envelope for ALERT_SHRINKFLATION.""" subscribers = publish_event( redis_mock, EventType.ALERT_SHRINKFLATION, service="shrinkray", payload={ "product_id": str(uuid.uuid4()), "product_name": "Cheerios Original", "old_size": "12 oz", "new_size": "10.8 oz", "confidence": "0.95", }, ) assert subscribers == 1 channel, raw_msg = redis_mock._published[0] assert channel == EventType.ALERT_SHRINKFLATION.value def test_full_pipeline_emits_events_at_each_stage( self, e2e_session: Session, redis_mock, user: User, store: Store ): """Full pipeline: ingest → match → record price → publish events at each stage.""" # Stage 1: Normalize receipt purchase_schema = normalize_receipt( MEIJER_RECEIPT_FIXTURE, user_id=str(user.id), store_id=str(store.id), ) # Publish receipt ingested publish_event( redis_mock, EventType.RECEIPTS_INGESTED, service="receiptwitness", payload={ "receipt_id": purchase_schema.receipt_id, "item_count": len(purchase_schema.items), }, ) # Stage 2: Match products matcher = ProductMatcher(e2e_session, auto_create=True) outcomes = matcher.match_items(purchase_schema.items) for i, outcome in enumerate(outcomes): product = outcome.match.product if outcome.match else None if product is None: # Auto-created — look up by name products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if p.canonical_name == purchase_schema.items[i].product_name_raw: product = p break if product is None: continue publish_event( redis_mock, EventType.PRODUCTS_NORMALIZED, service="cartsnitch-common", payload={ "product_id": str(product.id), "match_method": outcome.match.method.value if outcome.match else "auto_create", "confidence": outcome.confidence_level.value, }, ) # Stage 3: Record prices for i, item_schema in enumerate(purchase_schema.items): product = outcomes[i].match.product if outcomes[i].match else None if product is None: products = e2e_session.execute(select(NormalizedProduct)).scalars().all() for p in products: if p.canonical_name == item_schema.product_name_raw: product = p break if product: _, delta = record_price_from_item( e2e_session, product_id=product.id, store_id=store.id, observed_date=purchase_schema.purchase_date, regular_price=item_schema.regular_price or item_schema.unit_price, ) if delta and delta.is_increase: publish_event( redis_mock, EventType.ALERT_PRICE_INCREASE, service="stickershock", payload={ "product_id": str(product.id), "old_price": str(delta.old_price), "new_price": str(delta.new_price), }, ) # Verify events published at each stage channels = [ch for ch, _ in redis_mock._published] assert EventType.RECEIPTS_INGESTED.value in channels assert EventType.PRODUCTS_NORMALIZED.value in channels # No price increases on first receipt, so no ALERT_PRICE_INCREASE expected # All messages are valid EventEnvelopes for _, raw_msg in redis_mock._published: envelope = EventEnvelope.model_validate_json(raw_msg) assert envelope.timestamp is not None assert envelope.service # =========================================================================== # Test class: Error handling for malformed scraper output # =========================================================================== class TestMalformedScraperOutput: """Error handling for bad, partial, or unexpected scraper data.""" def test_missing_item_name_produces_empty_string(self): """Item with no description/name field normalizes with empty product_name_raw.""" item = parse_meijer_item({"unitPrice": "2.99"}) assert item.product_name_raw == "" assert item.unit_price == Decimal("2.99") def test_missing_price_defaults_to_zero(self): """Item with no price fields defaults to zero.""" item = parse_meijer_item({"description": "Mystery Product"}) assert item.unit_price == Decimal("0") assert item.extended_price == Decimal("0") def test_non_numeric_price_defaults_to_zero(self): """Non-numeric price strings safely default to zero.""" item = parse_meijer_item( { "description": "Bad Price Item", "unitPrice": "not_a_number", "extendedPrice": "$$$.xx", } ) assert item.unit_price == Decimal("0") assert item.extended_price == Decimal("0") def test_empty_receipt_produces_empty_items(self, user: User, store: Store): """Receipt with no items normalizes cleanly.""" raw = {"receiptId": "EMPTY-001", "date": "2026-03-15", "total": "0.00"} purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id)) assert purchase.receipt_id == "EMPTY-001" assert purchase.total == Decimal("0.00") assert len(purchase.items) == 0 def test_receipt_missing_date_defaults_to_today(self, user: User, store: Store): """Receipt with no date field defaults to today.""" raw = {"receiptId": "NO-DATE-001", "total": "5.00", "items": []} purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id)) assert purchase.purchase_date == date.today() def test_receipt_missing_id_generates_uuid(self, user: User, store: Store): """Receipt with no ID generates a UUID.""" raw = {"date": "2026-03-15", "total": "10.00", "items": []} purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id)) # Should be a valid UUID string uuid.UUID(purchase.receipt_id) def test_item_with_garbage_upc_preserves_it(self): """UPC field with non-standard content is preserved as-is after strip.""" item = parse_meijer_item( { "description": "Weird UPC Product", "upc": " ABC-NOT-A-UPC ", "unitPrice": "1.99", } ) # lstrip("0") on "ABC-NOT-A-UPC" leaves it intact assert item.upc == "ABC-NOT-A-UPC" def test_negative_prices_pass_through(self): """Negative prices (refunds) are preserved, not zeroed.""" item = parse_meijer_item( { "description": "Refund Item", "unitPrice": "-5.99", "extendedPrice": "-5.99", } ) assert item.unit_price == Decimal("-5.99") assert item.extended_price == Decimal("-5.99") def test_extended_price_auto_calculated(self): """When extendedPrice is missing, it's calculated from unitPrice * quantity.""" item = parse_meijer_item( { "description": "No Extended", "unitPrice": "2.50", "quantity": "3", } ) assert item.extended_price == Decimal("7.50") def test_matching_with_malformed_items(self, e2e_session: Session): """ProductMatcher handles items with missing/empty names gracefully.""" matcher = ProductMatcher(e2e_session, auto_create=True) bad_items = [ parse_meijer_item({"description": "", "unitPrice": "1.00"}), parse_meijer_item({"unitPrice": "2.00"}), ] outcomes = matcher.match_items(bad_items) assert len(outcomes) == 2 # Both should auto-create (no match possible for empty names) assert all(o.created_new for o in outcomes) def test_completely_empty_receipt(self, user: User, store: Store): """Totally empty dict produces a valid PurchaseCreate with defaults.""" purchase = normalize_receipt({}, user_id=str(user.id), store_id=str(store.id)) assert purchase.total == Decimal("0") assert len(purchase.items) == 0 assert purchase.purchase_date == date.today() def test_mixed_valid_and_malformed_items(self, user: User, store: Store): """Receipt with a mix of good and bad items processes all of them.""" raw = { "receiptId": "MIX-001", "date": "2026-03-15", "total": "10.00", "items": [ { "description": "Good Product 8 oz", "upc": "1234567890", "unitPrice": "3.99", "extendedPrice": "3.99", }, { "unitPrice": "not_a_price", }, { "description": " *** Special Chars !!! ", "unitPrice": "2.50", }, ], } purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id)) assert len(purchase.items) == 3 # Good item assert purchase.items[0].product_name_raw == "Good Product 8 oz" assert purchase.items[0].upc == "1234567890" # Bad price item assert purchase.items[1].unit_price == Decimal("0") # Special chars stripped assert purchase.items[2].product_name_raw == "Special Chars"