forked from cartsnitch/cartsnitch
Merge commit '4cf6f91e954b770198578bcb8db5d98ac964bfed' as 'common'
This commit is contained in:
@@ -0,0 +1,24 @@
|
||||
"""Shared test fixtures for cartsnitch-common tests."""
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
from cartsnitch_common.models.base import Base
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def engine():
|
||||
"""In-memory SQLite engine for unit tests."""
|
||||
eng = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(eng)
|
||||
yield eng
|
||||
eng.dispose()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def session(engine):
|
||||
"""SQLAlchemy session bound to in-memory SQLite."""
|
||||
factory = sessionmaker(bind=engine)
|
||||
with factory() as sess:
|
||||
yield sess
|
||||
@@ -0,0 +1,376 @@
|
||||
"""Tests for SQLAlchemy ORM models."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import inspect
|
||||
|
||||
from cartsnitch_common.constants import (
|
||||
AccountStatus,
|
||||
DiscountType,
|
||||
PriceSource,
|
||||
ProductCategory,
|
||||
SizeUnit,
|
||||
StoreSlug,
|
||||
)
|
||||
from cartsnitch_common.models import (
|
||||
Coupon,
|
||||
NormalizedProduct,
|
||||
PriceHistory,
|
||||
Purchase,
|
||||
PurchaseItem,
|
||||
ShrinkflationEvent,
|
||||
Store,
|
||||
StoreLocation,
|
||||
User,
|
||||
UserStoreAccount,
|
||||
)
|
||||
|
||||
|
||||
class TestTableCreation:
|
||||
"""Verify all expected tables are created."""
|
||||
|
||||
def test_all_tables_exist(self, engine):
|
||||
inspector = inspect(engine)
|
||||
table_names = set(inspector.get_table_names())
|
||||
expected = {
|
||||
"stores",
|
||||
"store_locations",
|
||||
"users",
|
||||
"user_store_accounts",
|
||||
"purchases",
|
||||
"purchase_items",
|
||||
"normalized_products",
|
||||
"price_history",
|
||||
"coupons",
|
||||
"shrinkflation_events",
|
||||
}
|
||||
assert expected.issubset(table_names)
|
||||
|
||||
def test_ten_tables_total(self, engine):
|
||||
inspector = inspect(engine)
|
||||
assert len(inspector.get_table_names()) == 10
|
||||
|
||||
|
||||
class TestUUIDPrimaryKeys:
|
||||
"""All models use UUID PKs."""
|
||||
|
||||
def test_store_uuid_pk(self, session):
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Meijer",
|
||||
slug=StoreSlug.MEIJER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(store)
|
||||
session.commit()
|
||||
assert isinstance(store.id, uuid.UUID)
|
||||
|
||||
def test_user_uuid_pk(self, session):
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email="test@example.com",
|
||||
hashed_password="hashed",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(user)
|
||||
session.commit()
|
||||
assert isinstance(user.id, uuid.UUID)
|
||||
|
||||
|
||||
class TestStoreModel:
|
||||
def test_store_slug_enum(self, session):
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Kroger",
|
||||
slug=StoreSlug.KROGER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(store)
|
||||
session.commit()
|
||||
assert store.slug == StoreSlug.KROGER
|
||||
|
||||
def test_store_unique_slug(self, session):
|
||||
s1 = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Target",
|
||||
slug=StoreSlug.TARGET,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
s2 = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Target Duplicate",
|
||||
slug=StoreSlug.TARGET,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(s1)
|
||||
session.commit()
|
||||
session.add(s2)
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
session.commit()
|
||||
session.rollback()
|
||||
|
||||
|
||||
class TestStoreLocationModel:
|
||||
def test_store_location_fields(self, session):
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Meijer",
|
||||
slug=StoreSlug.MEIJER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(store)
|
||||
session.flush()
|
||||
loc = StoreLocation(
|
||||
id=uuid.uuid4(),
|
||||
store_id=store.id,
|
||||
address="123 Main St",
|
||||
city="Ann Arbor",
|
||||
state="MI",
|
||||
zip="48104",
|
||||
lat=42.2808,
|
||||
lng=-83.7430,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(loc)
|
||||
session.commit()
|
||||
assert loc.city == "Ann Arbor"
|
||||
assert loc.lat == pytest.approx(42.2808)
|
||||
|
||||
|
||||
class TestUserStoreAccountModel:
|
||||
def test_account_status_enum(self, session):
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email="test@test.com",
|
||||
hashed_password="hashed",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Kroger",
|
||||
slug=StoreSlug.KROGER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add_all([user, store])
|
||||
session.flush()
|
||||
acct = UserStoreAccount(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user.id,
|
||||
store_id=store.id,
|
||||
status=AccountStatus.ACTIVE,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(acct)
|
||||
session.commit()
|
||||
assert acct.status == AccountStatus.ACTIVE
|
||||
|
||||
def test_unique_user_store_constraint(self, session):
|
||||
"""One account per user per store."""
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email="unique@test.com",
|
||||
hashed_password="hashed",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Target",
|
||||
slug=StoreSlug.TARGET,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add_all([user, store])
|
||||
session.flush()
|
||||
a1 = UserStoreAccount(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user.id,
|
||||
store_id=store.id,
|
||||
status=AccountStatus.ACTIVE,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
a2 = UserStoreAccount(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user.id,
|
||||
store_id=store.id,
|
||||
status=AccountStatus.EXPIRED,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(a1)
|
||||
session.commit()
|
||||
session.add(a2)
|
||||
with pytest.raises(Exception): # noqa: B017
|
||||
session.commit()
|
||||
session.rollback()
|
||||
|
||||
|
||||
class TestPurchaseModel:
|
||||
def test_purchase_with_items(self, session):
|
||||
user = User(
|
||||
id=uuid.uuid4(),
|
||||
email="buyer@test.com",
|
||||
hashed_password="hashed",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Meijer",
|
||||
slug=StoreSlug.MEIJER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add_all([user, store])
|
||||
session.flush()
|
||||
purchase = Purchase(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user.id,
|
||||
store_id=store.id,
|
||||
receipt_id="RCP-001",
|
||||
purchase_date=date(2026, 3, 15),
|
||||
total=Decimal("42.50"),
|
||||
ingested_at=datetime.now(UTC),
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(purchase)
|
||||
session.flush()
|
||||
item = PurchaseItem(
|
||||
id=uuid.uuid4(),
|
||||
purchase_id=purchase.id,
|
||||
product_name_raw="Meijer Whole Milk 1 Gallon",
|
||||
upc="0041250000001",
|
||||
quantity=Decimal("1"),
|
||||
unit_price=Decimal("3.49"),
|
||||
extended_price=Decimal("3.49"),
|
||||
)
|
||||
session.add(item)
|
||||
session.commit()
|
||||
assert item.product_name_raw == "Meijer Whole Milk 1 Gallon"
|
||||
assert item.unit_price == Decimal("3.49")
|
||||
|
||||
|
||||
class TestNormalizedProductModel:
|
||||
def test_product_with_upc_variants(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Whole Milk, 1 Gallon",
|
||||
category=ProductCategory.DAIRY,
|
||||
brand="Store Brand",
|
||||
size="128",
|
||||
size_unit=SizeUnit.FL_OZ,
|
||||
upc_variants=["0041250000001", "0041250000002"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
assert product.category == ProductCategory.DAIRY
|
||||
assert product.size_unit == SizeUnit.FL_OZ
|
||||
|
||||
|
||||
class TestPriceHistoryModel:
|
||||
def test_price_source_enum(self, session):
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Kroger",
|
||||
slug=StoreSlug.KROGER,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Eggs, Large, 12ct",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add_all([store, product])
|
||||
session.flush()
|
||||
ph = PriceHistory(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("4.99"),
|
||||
sale_price=Decimal("3.99"),
|
||||
source=PriceSource.RECEIPT,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(ph)
|
||||
session.commit()
|
||||
assert ph.source == PriceSource.RECEIPT
|
||||
assert ph.regular_price == Decimal("4.99")
|
||||
|
||||
|
||||
class TestCouponModel:
|
||||
def test_coupon_discount_types(self, session):
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Target",
|
||||
slug=StoreSlug.TARGET,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(store)
|
||||
session.flush()
|
||||
coupon = Coupon(
|
||||
id=uuid.uuid4(),
|
||||
store_id=store.id,
|
||||
title="$2 off eggs",
|
||||
discount_type=DiscountType.FIXED,
|
||||
discount_value=Decimal("2.00"),
|
||||
requires_clip=True,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(coupon)
|
||||
session.commit()
|
||||
assert coupon.discount_type == DiscountType.FIXED
|
||||
assert coupon.discount_value == Decimal("2.00")
|
||||
|
||||
|
||||
class TestShrinkflationEventModel:
|
||||
def test_shrinkflation_event(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Cereal, Honey Oats",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.flush()
|
||||
event = ShrinkflationEvent(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
detected_date=date(2026, 3, 10),
|
||||
old_size="18",
|
||||
new_size="15.4",
|
||||
old_unit=SizeUnit.OZ,
|
||||
new_unit=SizeUnit.OZ,
|
||||
price_at_old_size=Decimal("4.99"),
|
||||
price_at_new_size=Decimal("4.99"),
|
||||
confidence=Decimal("0.95"),
|
||||
notes="Size reduced by 14.4%, price unchanged",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(event)
|
||||
session.commit()
|
||||
assert event.confidence == Decimal("0.95")
|
||||
assert event.old_unit == SizeUnit.OZ
|
||||
@@ -0,0 +1,157 @@
|
||||
"""Tests for product normalization module."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from cartsnitch_common.normalization import (
|
||||
MatchMethod,
|
||||
clean_name,
|
||||
extract_size_info,
|
||||
jaccard_similarity,
|
||||
match_by_name,
|
||||
match_by_upc,
|
||||
normalize_product,
|
||||
)
|
||||
|
||||
|
||||
class TestCleanName:
|
||||
def test_lowercase(self):
|
||||
assert clean_name("Kroger WHOLE MILK") == "kroger whole milk"
|
||||
|
||||
def test_removes_size_info(self):
|
||||
assert "oz" not in clean_name("Milk 16 oz Whole")
|
||||
|
||||
def test_removes_noise_words(self):
|
||||
cleaned = clean_name("The Original Brand Milk")
|
||||
assert "the" not in cleaned.split()
|
||||
assert "original" not in cleaned.split()
|
||||
assert "brand" not in cleaned.split()
|
||||
|
||||
def test_collapses_whitespace(self):
|
||||
assert " " not in clean_name("Milk Whole Gallon")
|
||||
|
||||
def test_removes_punctuation(self):
|
||||
cleaned = clean_name("Meijer's Best (Organic) Milk!")
|
||||
assert "'" not in cleaned
|
||||
assert "(" not in cleaned
|
||||
|
||||
|
||||
class TestExtractSizeInfo:
|
||||
def test_extracts_oz(self):
|
||||
result = extract_size_info("Cereal 18 oz box")
|
||||
assert result == ("18", "oz")
|
||||
|
||||
def test_extracts_fl_oz(self):
|
||||
result = extract_size_info("Juice 64 fl oz")
|
||||
assert result == ("64", "fl_oz")
|
||||
|
||||
def test_extracts_lb(self):
|
||||
result = extract_size_info("Ground Beef 1.5 lb")
|
||||
assert result == ("1.5", "lb")
|
||||
|
||||
def test_extracts_ct(self):
|
||||
result = extract_size_info("Eggs Large 12 ct")
|
||||
assert result == ("12", "ct")
|
||||
|
||||
def test_no_size_returns_none(self):
|
||||
assert extract_size_info("Bananas") is None
|
||||
|
||||
|
||||
class TestJaccardSimilarity:
|
||||
def test_identical_strings(self):
|
||||
assert jaccard_similarity("whole milk gallon", "whole milk gallon") == 1.0
|
||||
|
||||
def test_completely_different(self):
|
||||
assert jaccard_similarity("apple juice", "ground beef") == 0.0
|
||||
|
||||
def test_partial_overlap(self):
|
||||
score = jaccard_similarity("kroger whole milk", "meijer whole milk")
|
||||
assert 0.4 < score < 0.8 # "whole" and "milk" overlap
|
||||
|
||||
def test_empty_strings(self):
|
||||
assert jaccard_similarity("", "") == 0.0
|
||||
assert jaccard_similarity("milk", "") == 0.0
|
||||
|
||||
|
||||
class TestMatchByUPC:
|
||||
def test_match_found(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Whole Milk, Gallon",
|
||||
upc_variants=["0041250000001", "0041250000002"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
# SQLite doesn't support JSONB containment — this will raise
|
||||
# In production (PostgreSQL), this would work
|
||||
result = match_by_upc(session, "0041250000001")
|
||||
assert result is not None
|
||||
assert result.method == MatchMethod.UPC
|
||||
assert result.confidence == 1.0
|
||||
|
||||
def test_no_match(self, session):
|
||||
result = match_by_upc(session, "9999999999999")
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestMatchByName:
|
||||
def test_exact_name_match(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Whole Milk, Gallon",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
result = match_by_name(session, "Whole Milk Gallon")
|
||||
assert result is not None
|
||||
assert result.method == MatchMethod.NAME
|
||||
assert result.confidence > 0.5
|
||||
|
||||
def test_fuzzy_match(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Kroger Whole Milk, 1 Gallon",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
result = match_by_name(session, "Meijer Whole Milk 1 Gallon", threshold=0.3)
|
||||
assert result is not None
|
||||
assert result.confidence > 0.3
|
||||
|
||||
def test_no_match_below_threshold(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Ground Beef 80/20",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
result = match_by_name(session, "Apple Juice 64 oz", threshold=0.5)
|
||||
assert result is None
|
||||
|
||||
|
||||
class TestNormalizeProduct:
|
||||
def test_name_fallback(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Large Eggs, 12 count",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
result = normalize_product(session, "Large Eggs 12 ct", upc=None)
|
||||
assert result is not None
|
||||
assert result.method == MatchMethod.NAME
|
||||
|
||||
def test_no_match(self, session):
|
||||
result = normalize_product(session, "Nonexistent Product XYZ", upc=None)
|
||||
assert result is None
|
||||
@@ -0,0 +1,949 @@
|
||||
"""End-to-end integration tests for the data pipeline.
|
||||
|
||||
Tests the full flow: scraper output → normalization → product matching → DB storage
|
||||
→ price tracking → shrinkflation detection → event publishing.
|
||||
|
||||
Uses real test fixtures with an in-memory SQLite database, not mocks.
|
||||
"""
|
||||
|
||||
import uuid
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine, select
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from cartsnitch_common.constants import (
|
||||
EventType,
|
||||
SizeUnit,
|
||||
StoreSlug,
|
||||
)
|
||||
from cartsnitch_common.events import publish_event
|
||||
from cartsnitch_common.models import (
|
||||
Base,
|
||||
NormalizedProduct,
|
||||
PriceHistory,
|
||||
Purchase,
|
||||
PurchaseItem,
|
||||
ShrinkflationEvent,
|
||||
Store,
|
||||
User,
|
||||
)
|
||||
from cartsnitch_common.pipeline.matching import ProductMatcher
|
||||
from cartsnitch_common.pipeline.price_tracking import (
|
||||
PriceDelta,
|
||||
get_price_trend,
|
||||
record_price_from_item,
|
||||
)
|
||||
from cartsnitch_common.pipeline.receipt import normalize_receipt, parse_meijer_item
|
||||
from cartsnitch_common.pipeline.shrinkflation import detect_shrinkflation
|
||||
from cartsnitch_common.schemas.events import EventEnvelope
|
||||
from cartsnitch_common.schemas.purchase import PurchaseCreate
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures: realistic scraper output from Meijer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
MEIJER_RECEIPT_FIXTURE = {
|
||||
"receiptId": "MJ-2026-03-15-00042",
|
||||
"date": "2026-03-15",
|
||||
"total": "47.82",
|
||||
"subtotal": "44.50",
|
||||
"taxAmount": "3.32",
|
||||
"totalSavings": "6.20",
|
||||
"items": [
|
||||
{
|
||||
"description": " Meijer Whole Milk 1 Gallon ",
|
||||
"upcCode": "00041250010001",
|
||||
"quantity": 1,
|
||||
"unitPrice": "3.29",
|
||||
"extendedPrice": "3.29",
|
||||
"regularPrice": "3.49",
|
||||
"salePrice": "3.29",
|
||||
"category": "Dairy",
|
||||
},
|
||||
{
|
||||
"name": "BARILLA SPAGHETTI 16 OZ",
|
||||
"upc": "076808280753",
|
||||
"qty": 2,
|
||||
"price": "1.69",
|
||||
"totalPrice": "3.38",
|
||||
"regularPrice": "1.89",
|
||||
"couponDiscount": "0.40",
|
||||
"department": "Pantry",
|
||||
},
|
||||
{
|
||||
"description": "Meijer Lean Ground Beef 1 lb",
|
||||
"upcCode": "00041250022004",
|
||||
"quantity": 1,
|
||||
"unitPrice": "5.99",
|
||||
"extendedPrice": "5.99",
|
||||
"regularPrice": "6.49",
|
||||
"loyaltyDiscount": "0.50",
|
||||
"category": "Meat",
|
||||
},
|
||||
{
|
||||
"description": "Cheerios Original 12 oz",
|
||||
"upcCode": "016000275645",
|
||||
"quantity": 1,
|
||||
"unitPrice": "4.49",
|
||||
"extendedPrice": "4.49",
|
||||
"regularPrice": "4.49",
|
||||
"category": "Snacks",
|
||||
},
|
||||
{
|
||||
"description": "Fresh Bananas",
|
||||
"quantity": 1,
|
||||
"unitPrice": "0.69",
|
||||
"extendedPrice": "0.69",
|
||||
"category": "Produce",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
MEIJER_RECEIPT_SECOND_VISIT = {
|
||||
"receiptId": "MJ-2026-03-18-00099",
|
||||
"date": "2026-03-18",
|
||||
"total": "12.47",
|
||||
"items": [
|
||||
{
|
||||
"description": "Meijer Whole Milk 1 Gallon",
|
||||
"upcCode": "00041250010001",
|
||||
"quantity": 1,
|
||||
"unitPrice": "3.49",
|
||||
"extendedPrice": "3.49",
|
||||
"regularPrice": "3.49",
|
||||
"category": "Dairy",
|
||||
},
|
||||
{
|
||||
"description": "BARILLA SPAGHETTI 16 OZ",
|
||||
"upc": "076808280753",
|
||||
"qty": 1,
|
||||
"price": "1.99",
|
||||
"totalPrice": "1.99",
|
||||
"regularPrice": "1.99",
|
||||
"department": "Pantry",
|
||||
},
|
||||
{
|
||||
"description": "Cheerios Original 10.8 oz",
|
||||
"upcCode": "016000275645",
|
||||
"quantity": 1,
|
||||
"unitPrice": "4.49",
|
||||
"extendedPrice": "4.49",
|
||||
"regularPrice": "4.49",
|
||||
"category": "Snacks",
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def e2e_engine():
|
||||
"""In-memory SQLite engine for E2E tests."""
|
||||
eng = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(eng)
|
||||
yield eng
|
||||
eng.dispose()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def e2e_session(e2e_engine):
|
||||
"""SQLAlchemy session with pre-seeded store and user."""
|
||||
factory = sessionmaker(bind=e2e_engine)
|
||||
with factory() as sess:
|
||||
yield sess
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def store(e2e_session: Session) -> Store:
|
||||
"""Seed a Meijer store."""
|
||||
s = Store(id=uuid.uuid4(), name="Meijer", slug=StoreSlug.MEIJER)
|
||||
e2e_session.add(s)
|
||||
e2e_session.flush()
|
||||
return s
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def user(e2e_session: Session) -> User:
|
||||
"""Seed a test user."""
|
||||
u = User(
|
||||
id=uuid.uuid4(),
|
||||
email="tester@cartsnitch.com",
|
||||
hashed_password="hashed_test_password",
|
||||
display_name="Test User",
|
||||
)
|
||||
e2e_session.add(u)
|
||||
e2e_session.flush()
|
||||
return u
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def redis_mock():
|
||||
"""A lightweight Redis mock that captures published messages."""
|
||||
client = MagicMock()
|
||||
published: list[tuple[str, str]] = []
|
||||
|
||||
def _publish(channel: str, message: str) -> int:
|
||||
published.append((channel, message))
|
||||
return 1
|
||||
|
||||
client.publish = MagicMock(side_effect=_publish)
|
||||
client._published = published
|
||||
return client
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Test class: Full pipeline E2E — scraper → normalization → matching → storage
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestFullPipelineE2E:
|
||||
"""Scraper output → normalize_receipt → ProductMatcher → DB storage."""
|
||||
|
||||
def test_normalize_meijer_receipt(self, user: User, store: Store):
|
||||
"""Raw Meijer receipt normalizes into a valid PurchaseCreate."""
|
||||
purchase = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
|
||||
assert isinstance(purchase, PurchaseCreate)
|
||||
assert purchase.receipt_id == "MJ-2026-03-15-00042"
|
||||
assert purchase.purchase_date == date(2026, 3, 15)
|
||||
assert purchase.total == Decimal("47.82")
|
||||
assert purchase.subtotal == Decimal("44.50")
|
||||
assert purchase.tax == Decimal("3.32")
|
||||
assert purchase.savings_total == Decimal("6.20")
|
||||
assert len(purchase.items) == 5
|
||||
assert purchase.raw_data == MEIJER_RECEIPT_FIXTURE
|
||||
|
||||
def test_item_field_normalization(self, user: User, store: Store):
|
||||
"""Items parse correctly regardless of field name variants."""
|
||||
purchase = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
|
||||
# Item using 'description' / 'upcCode' fields
|
||||
milk = purchase.items[0]
|
||||
assert milk.product_name_raw == "Meijer Whole Milk 1 Gallon"
|
||||
assert milk.upc == "41250010001" # leading zeros stripped
|
||||
assert milk.unit_price == Decimal("3.29")
|
||||
|
||||
# Item using 'name' / 'upc' / 'qty' / 'price' / 'totalPrice' fields
|
||||
pasta = purchase.items[1]
|
||||
assert pasta.product_name_raw == "BARILLA SPAGHETTI 16 OZ"
|
||||
assert pasta.upc == "76808280753"
|
||||
assert pasta.quantity == Decimal("2")
|
||||
assert pasta.extended_price == Decimal("3.38")
|
||||
assert pasta.coupon_discount == Decimal("0.40")
|
||||
|
||||
def test_upc_product_matching_and_storage(self, e2e_session: Session, user: User, store: Store):
|
||||
"""Full flow: normalize → match → store in DB. UPC matching works E2E."""
|
||||
purchase_schema = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
|
||||
# Run product matching
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
outcomes = matcher.match_items(purchase_schema.items)
|
||||
|
||||
assert len(outcomes) == 5
|
||||
|
||||
# First item has a UPC — auto_create makes a new product
|
||||
assert outcomes[0].created_new is True
|
||||
|
||||
# Store the purchase in DB
|
||||
purchase_db = Purchase(
|
||||
id=uuid.uuid4(),
|
||||
user_id=user.id,
|
||||
store_id=store.id,
|
||||
receipt_id=purchase_schema.receipt_id,
|
||||
purchase_date=purchase_schema.purchase_date,
|
||||
total=purchase_schema.total,
|
||||
subtotal=purchase_schema.subtotal,
|
||||
tax=purchase_schema.tax,
|
||||
savings_total=purchase_schema.savings_total,
|
||||
raw_data=purchase_schema.raw_data,
|
||||
)
|
||||
e2e_session.add(purchase_db)
|
||||
e2e_session.flush()
|
||||
|
||||
# Store items linked to the purchase and matched products
|
||||
for _i, item_schema in enumerate(purchase_schema.items):
|
||||
item_db = PurchaseItem(
|
||||
id=uuid.uuid4(),
|
||||
purchase_id=purchase_db.id,
|
||||
product_name_raw=item_schema.product_name_raw,
|
||||
upc=item_schema.upc,
|
||||
quantity=item_schema.quantity,
|
||||
unit_price=item_schema.unit_price,
|
||||
extended_price=item_schema.extended_price,
|
||||
regular_price=item_schema.regular_price,
|
||||
sale_price=item_schema.sale_price,
|
||||
coupon_discount=item_schema.coupon_discount,
|
||||
loyalty_discount=item_schema.loyalty_discount,
|
||||
category_raw=item_schema.category_raw,
|
||||
)
|
||||
e2e_session.add(item_db)
|
||||
e2e_session.flush()
|
||||
|
||||
# Verify data persisted correctly
|
||||
stored_purchase = e2e_session.execute(
|
||||
select(Purchase).where(Purchase.receipt_id == "MJ-2026-03-15-00042")
|
||||
).scalar_one()
|
||||
assert stored_purchase.total == Decimal("47.82")
|
||||
assert stored_purchase.user_id == user.id
|
||||
assert stored_purchase.store_id == store.id
|
||||
|
||||
stored_items = (
|
||||
e2e_session.execute(
|
||||
select(PurchaseItem).where(PurchaseItem.purchase_id == stored_purchase.id)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert len(stored_items) == 5
|
||||
|
||||
# Verify products were created in normalized_products table
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
assert len(products) == 5 # all 5 items auto-created products
|
||||
|
||||
def test_second_visit_reuses_existing_products(
|
||||
self, e2e_session: Session, user: User, store: Store
|
||||
):
|
||||
"""On second receipt, products matched by UPC reuse existing records."""
|
||||
# Ingest first receipt
|
||||
first = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
matcher.match_items(first.items)
|
||||
|
||||
products_after_first = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
first_count = len(products_after_first)
|
||||
|
||||
# Ingest second receipt — overlapping UPCs
|
||||
second = normalize_receipt(
|
||||
MEIJER_RECEIPT_SECOND_VISIT,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
second_outcomes = matcher.match_items(second.items)
|
||||
|
||||
# Milk, pasta, cheerios should match existing by UPC
|
||||
assert second_outcomes[0].created_new is False # milk — UPC match
|
||||
assert second_outcomes[1].created_new is False # pasta — UPC match
|
||||
assert second_outcomes[2].created_new is False # cheerios — UPC match
|
||||
|
||||
products_after_second = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
assert len(products_after_second) == first_count # no new products created
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Test class: Price tracking and shrinkflation detection E2E
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestPriceTrackingE2E:
|
||||
"""Price recording from stored items and price delta detection."""
|
||||
|
||||
def test_price_recorded_from_ingested_receipt(
|
||||
self, e2e_session: Session, user: User, store: Store
|
||||
):
|
||||
"""Ingest receipt → match products → record prices → verify price history."""
|
||||
purchase_schema = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
outcomes = matcher.match_items(purchase_schema.items)
|
||||
|
||||
# Record prices for each matched item
|
||||
price_entries = []
|
||||
for i, item_schema in enumerate(purchase_schema.items):
|
||||
product = outcomes[i].match.product if outcomes[i].match else None
|
||||
if product is None:
|
||||
# Was auto-created — find the product directly
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if p.canonical_name == item_schema.product_name_raw:
|
||||
product = p
|
||||
break
|
||||
|
||||
if product:
|
||||
entry, delta = record_price_from_item(
|
||||
e2e_session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=purchase_schema.purchase_date,
|
||||
regular_price=item_schema.regular_price or item_schema.unit_price,
|
||||
sale_price=item_schema.sale_price,
|
||||
)
|
||||
price_entries.append((entry, delta))
|
||||
|
||||
# First ingestion — no deltas expected
|
||||
assert all(delta is None for _, delta in price_entries)
|
||||
|
||||
# Verify price history stored
|
||||
all_prices = e2e_session.execute(select(PriceHistory)).scalars().all()
|
||||
assert len(all_prices) >= 4 # at least the items with regular_price
|
||||
|
||||
def test_price_increase_detected_on_second_receipt(
|
||||
self, e2e_session: Session, user: User, store: Store
|
||||
):
|
||||
"""Second receipt with higher price triggers a PriceDelta."""
|
||||
# Ingest first receipt
|
||||
first = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
first_outcomes = matcher.match_items(first.items)
|
||||
|
||||
# Record first prices
|
||||
for i, item_schema in enumerate(first.items):
|
||||
product = first_outcomes[i].match.product if first_outcomes[i].match else None
|
||||
if product is None:
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if p.canonical_name == item_schema.product_name_raw:
|
||||
product = p
|
||||
break
|
||||
if product:
|
||||
record_price_from_item(
|
||||
e2e_session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=first.purchase_date,
|
||||
regular_price=item_schema.regular_price or item_schema.unit_price,
|
||||
sale_price=item_schema.sale_price,
|
||||
)
|
||||
|
||||
# Ingest second receipt — pasta price went up ($1.89 → $1.99)
|
||||
second = normalize_receipt(
|
||||
MEIJER_RECEIPT_SECOND_VISIT,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
second_outcomes = matcher.match_items(second.items)
|
||||
|
||||
# Record second prices and capture deltas
|
||||
deltas: list[PriceDelta] = []
|
||||
for i, item_schema in enumerate(second.items):
|
||||
product = second_outcomes[i].match.product if second_outcomes[i].match else None
|
||||
if product is None:
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if p.canonical_name == item_schema.product_name_raw:
|
||||
product = p
|
||||
break
|
||||
if product:
|
||||
_, delta = record_price_from_item(
|
||||
e2e_session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=second.purchase_date,
|
||||
regular_price=item_schema.regular_price or item_schema.unit_price,
|
||||
sale_price=item_schema.sale_price,
|
||||
)
|
||||
if delta:
|
||||
deltas.append(delta)
|
||||
|
||||
# Milk went from $3.49 → $3.49 (no change); pasta from $1.89 → $1.99 (increase)
|
||||
price_increases = [d for d in deltas if d.is_increase]
|
||||
assert len(price_increases) >= 1
|
||||
|
||||
pasta_delta = next(
|
||||
(d for d in price_increases if d.old_price == Decimal("1.89")),
|
||||
None,
|
||||
)
|
||||
assert pasta_delta is not None
|
||||
assert pasta_delta.new_price == Decimal("1.99")
|
||||
assert pasta_delta.change_amount == Decimal("0.10")
|
||||
assert pasta_delta.is_increase is True
|
||||
|
||||
def test_price_trend_across_visits(self, e2e_session: Session, user: User, store: Store):
|
||||
"""get_price_trend returns ordered history after multiple ingestions."""
|
||||
# Create a product manually
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Test Product",
|
||||
upc_variants=["1234567890"],
|
||||
)
|
||||
e2e_session.add(product)
|
||||
e2e_session.flush()
|
||||
|
||||
# Record 3 prices on different dates
|
||||
dates_prices = [
|
||||
(date(2026, 3, 10), Decimal("2.99")),
|
||||
(date(2026, 3, 13), Decimal("3.19")),
|
||||
(date(2026, 3, 16), Decimal("2.79")),
|
||||
]
|
||||
for obs_date, price in dates_prices:
|
||||
record_price_from_item(
|
||||
e2e_session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=obs_date,
|
||||
regular_price=price,
|
||||
)
|
||||
|
||||
trend = get_price_trend(e2e_session, product.id, store.id)
|
||||
assert len(trend) == 3
|
||||
# Newest first
|
||||
assert trend[0].regular_price == Decimal("2.79")
|
||||
assert trend[1].regular_price == Decimal("3.19")
|
||||
assert trend[2].regular_price == Decimal("2.99")
|
||||
|
||||
|
||||
class TestShrinkflationE2E:
|
||||
"""Shrinkflation detection integrated with product matching."""
|
||||
|
||||
def test_shrinkflation_detected_from_receipt_data(
|
||||
self, e2e_session: Session, user: User, store: Store
|
||||
):
|
||||
"""Cheerios went from 12 oz → 10.8 oz between receipts. Detect shrinkflation."""
|
||||
# Ingest first receipt — creates Cheerios product with size from name
|
||||
first = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
first_outcomes = matcher.match_items(first.items)
|
||||
|
||||
# Find the Cheerios product (index 3 in fixture)
|
||||
cheerios_product = None
|
||||
for outcome in first_outcomes:
|
||||
if outcome.match and outcome.match.product:
|
||||
p = outcome.match.product
|
||||
else:
|
||||
# Check auto-created products
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if "cheerios" in p.canonical_name.lower():
|
||||
cheerios_product = p
|
||||
break
|
||||
if cheerios_product:
|
||||
break
|
||||
else:
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if "cheerios" in p.canonical_name.lower():
|
||||
cheerios_product = p
|
||||
break
|
||||
|
||||
assert cheerios_product is not None
|
||||
# The auto-created product should have extracted "12" and "oz" from name
|
||||
assert cheerios_product.size == "12"
|
||||
assert cheerios_product.size_unit == SizeUnit.OZ
|
||||
|
||||
# Now detect shrinkflation: 12 oz → 10.8 oz
|
||||
event = detect_shrinkflation(
|
||||
e2e_session,
|
||||
product=cheerios_product,
|
||||
new_size="10.8",
|
||||
new_unit=SizeUnit.OZ,
|
||||
new_price=Decimal("4.49"),
|
||||
detected_date=date(2026, 3, 18),
|
||||
)
|
||||
|
||||
assert event is not None
|
||||
assert isinstance(event, ShrinkflationEvent)
|
||||
assert event.old_size == "12"
|
||||
assert event.new_size == "10.8"
|
||||
assert event.old_unit == SizeUnit.OZ
|
||||
assert event.new_unit == SizeUnit.OZ
|
||||
assert event.confidence >= Decimal("0.85") # 10% decrease → 0.95
|
||||
|
||||
# Verify stored in DB
|
||||
stored = e2e_session.execute(
|
||||
select(ShrinkflationEvent).where(
|
||||
ShrinkflationEvent.normalized_product_id == cheerios_product.id
|
||||
)
|
||||
).scalar_one()
|
||||
assert stored.id == event.id
|
||||
|
||||
def test_shrinkflation_dedup_on_repeat_detection(
|
||||
self, e2e_session: Session, user: User, store: Store
|
||||
):
|
||||
"""Same shrinkflation detected twice returns the existing event, not a duplicate."""
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Brand X Cereal 15 oz",
|
||||
size="15",
|
||||
size_unit=SizeUnit.OZ,
|
||||
upc_variants=["999888777"],
|
||||
)
|
||||
e2e_session.add(product)
|
||||
e2e_session.flush()
|
||||
|
||||
first = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ)
|
||||
second = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ)
|
||||
|
||||
assert first is not None
|
||||
assert second is not None
|
||||
assert first.id == second.id # same event, not duplicated
|
||||
|
||||
count = len(
|
||||
e2e_session.execute(
|
||||
select(ShrinkflationEvent).where(
|
||||
ShrinkflationEvent.normalized_product_id == product.id
|
||||
)
|
||||
)
|
||||
.scalars()
|
||||
.all()
|
||||
)
|
||||
assert count == 1
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Test class: Event bus pub/sub for pipeline stage transitions
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestEventBusE2E:
|
||||
"""Redis event publishing at each pipeline stage."""
|
||||
|
||||
def test_receipt_ingested_event(self, redis_mock, user: User, store: Store):
|
||||
"""publish_event sends a valid EventEnvelope for RECEIPTS_INGESTED."""
|
||||
purchase_schema = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
|
||||
subscribers = publish_event(
|
||||
redis_mock,
|
||||
EventType.RECEIPTS_INGESTED,
|
||||
service="receiptwitness",
|
||||
payload={
|
||||
"receipt_id": purchase_schema.receipt_id,
|
||||
"user_id": str(user.id),
|
||||
"store_slug": StoreSlug.MEIJER,
|
||||
"item_count": len(purchase_schema.items),
|
||||
"total": str(purchase_schema.total),
|
||||
},
|
||||
)
|
||||
|
||||
assert subscribers == 1
|
||||
assert len(redis_mock._published) == 1
|
||||
channel, raw_msg = redis_mock._published[0]
|
||||
assert channel == EventType.RECEIPTS_INGESTED.value
|
||||
|
||||
# Deserialize and validate the envelope
|
||||
envelope = EventEnvelope.model_validate_json(raw_msg)
|
||||
assert envelope.event_type == EventType.RECEIPTS_INGESTED
|
||||
assert envelope.service == "receiptwitness"
|
||||
assert envelope.payload["receipt_id"] == "MJ-2026-03-15-00042"
|
||||
assert envelope.payload["item_count"] == 5
|
||||
|
||||
def test_price_updated_event(self, redis_mock, user: User, store: Store):
|
||||
"""publish_event sends a valid envelope for PRICES_UPDATED."""
|
||||
subscribers = publish_event(
|
||||
redis_mock,
|
||||
EventType.PRICES_UPDATED,
|
||||
service="cartsnitch-common",
|
||||
payload={
|
||||
"product_id": str(uuid.uuid4()),
|
||||
"store_slug": StoreSlug.MEIJER,
|
||||
"old_price": "1.89",
|
||||
"new_price": "1.99",
|
||||
"change_percent": "5.29",
|
||||
},
|
||||
)
|
||||
|
||||
assert subscribers == 1
|
||||
channel, raw_msg = redis_mock._published[0]
|
||||
assert channel == EventType.PRICES_UPDATED.value
|
||||
|
||||
envelope = EventEnvelope.model_validate_json(raw_msg)
|
||||
assert envelope.event_type == EventType.PRICES_UPDATED
|
||||
assert envelope.payload["old_price"] == "1.89"
|
||||
|
||||
def test_products_normalized_event(self, redis_mock, user: User, store: Store):
|
||||
"""publish_event sends a valid envelope for PRODUCTS_NORMALIZED."""
|
||||
product_id = str(uuid.uuid4())
|
||||
subscribers = publish_event(
|
||||
redis_mock,
|
||||
EventType.PRODUCTS_NORMALIZED,
|
||||
service="cartsnitch-common",
|
||||
payload={
|
||||
"product_id": product_id,
|
||||
"canonical_name": "Barilla Spaghetti",
|
||||
"match_method": "upc",
|
||||
"confidence": "high",
|
||||
},
|
||||
)
|
||||
|
||||
assert subscribers == 1
|
||||
channel, raw_msg = redis_mock._published[0]
|
||||
assert channel == EventType.PRODUCTS_NORMALIZED.value
|
||||
envelope = EventEnvelope.model_validate_json(raw_msg)
|
||||
assert envelope.payload["confidence"] == "high"
|
||||
|
||||
def test_shrinkflation_alert_event(self, redis_mock, user: User, store: Store):
|
||||
"""publish_event sends a valid envelope for ALERT_SHRINKFLATION."""
|
||||
subscribers = publish_event(
|
||||
redis_mock,
|
||||
EventType.ALERT_SHRINKFLATION,
|
||||
service="shrinkray",
|
||||
payload={
|
||||
"product_id": str(uuid.uuid4()),
|
||||
"product_name": "Cheerios Original",
|
||||
"old_size": "12 oz",
|
||||
"new_size": "10.8 oz",
|
||||
"confidence": "0.95",
|
||||
},
|
||||
)
|
||||
|
||||
assert subscribers == 1
|
||||
channel, raw_msg = redis_mock._published[0]
|
||||
assert channel == EventType.ALERT_SHRINKFLATION.value
|
||||
|
||||
def test_full_pipeline_emits_events_at_each_stage(
|
||||
self, e2e_session: Session, redis_mock, user: User, store: Store
|
||||
):
|
||||
"""Full pipeline: ingest → match → record price → publish events at each stage."""
|
||||
# Stage 1: Normalize receipt
|
||||
purchase_schema = normalize_receipt(
|
||||
MEIJER_RECEIPT_FIXTURE,
|
||||
user_id=str(user.id),
|
||||
store_id=str(store.id),
|
||||
)
|
||||
|
||||
# Publish receipt ingested
|
||||
publish_event(
|
||||
redis_mock,
|
||||
EventType.RECEIPTS_INGESTED,
|
||||
service="receiptwitness",
|
||||
payload={
|
||||
"receipt_id": purchase_schema.receipt_id,
|
||||
"item_count": len(purchase_schema.items),
|
||||
},
|
||||
)
|
||||
|
||||
# Stage 2: Match products
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
outcomes = matcher.match_items(purchase_schema.items)
|
||||
|
||||
for i, outcome in enumerate(outcomes):
|
||||
product = outcome.match.product if outcome.match else None
|
||||
if product is None:
|
||||
# Auto-created — look up by name
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if p.canonical_name == purchase_schema.items[i].product_name_raw:
|
||||
product = p
|
||||
break
|
||||
if product is None:
|
||||
continue
|
||||
publish_event(
|
||||
redis_mock,
|
||||
EventType.PRODUCTS_NORMALIZED,
|
||||
service="cartsnitch-common",
|
||||
payload={
|
||||
"product_id": str(product.id),
|
||||
"match_method": outcome.match.method.value if outcome.match else "auto_create",
|
||||
"confidence": outcome.confidence_level.value,
|
||||
},
|
||||
)
|
||||
|
||||
# Stage 3: Record prices
|
||||
for i, item_schema in enumerate(purchase_schema.items):
|
||||
product = outcomes[i].match.product if outcomes[i].match else None
|
||||
if product is None:
|
||||
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
|
||||
for p in products:
|
||||
if p.canonical_name == item_schema.product_name_raw:
|
||||
product = p
|
||||
break
|
||||
if product:
|
||||
_, delta = record_price_from_item(
|
||||
e2e_session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=purchase_schema.purchase_date,
|
||||
regular_price=item_schema.regular_price or item_schema.unit_price,
|
||||
)
|
||||
if delta and delta.is_increase:
|
||||
publish_event(
|
||||
redis_mock,
|
||||
EventType.ALERT_PRICE_INCREASE,
|
||||
service="stickershock",
|
||||
payload={
|
||||
"product_id": str(product.id),
|
||||
"old_price": str(delta.old_price),
|
||||
"new_price": str(delta.new_price),
|
||||
},
|
||||
)
|
||||
|
||||
# Verify events published at each stage
|
||||
channels = [ch for ch, _ in redis_mock._published]
|
||||
assert EventType.RECEIPTS_INGESTED.value in channels
|
||||
assert EventType.PRODUCTS_NORMALIZED.value in channels
|
||||
# No price increases on first receipt, so no ALERT_PRICE_INCREASE expected
|
||||
|
||||
# All messages are valid EventEnvelopes
|
||||
for _, raw_msg in redis_mock._published:
|
||||
envelope = EventEnvelope.model_validate_json(raw_msg)
|
||||
assert envelope.timestamp is not None
|
||||
assert envelope.service
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Test class: Error handling for malformed scraper output
|
||||
# ===========================================================================
|
||||
|
||||
|
||||
class TestMalformedScraperOutput:
|
||||
"""Error handling for bad, partial, or unexpected scraper data."""
|
||||
|
||||
def test_missing_item_name_produces_empty_string(self):
|
||||
"""Item with no description/name field normalizes with empty product_name_raw."""
|
||||
item = parse_meijer_item({"unitPrice": "2.99"})
|
||||
assert item.product_name_raw == ""
|
||||
assert item.unit_price == Decimal("2.99")
|
||||
|
||||
def test_missing_price_defaults_to_zero(self):
|
||||
"""Item with no price fields defaults to zero."""
|
||||
item = parse_meijer_item({"description": "Mystery Product"})
|
||||
assert item.unit_price == Decimal("0")
|
||||
assert item.extended_price == Decimal("0")
|
||||
|
||||
def test_non_numeric_price_defaults_to_zero(self):
|
||||
"""Non-numeric price strings safely default to zero."""
|
||||
item = parse_meijer_item(
|
||||
{
|
||||
"description": "Bad Price Item",
|
||||
"unitPrice": "not_a_number",
|
||||
"extendedPrice": "$$$.xx",
|
||||
}
|
||||
)
|
||||
assert item.unit_price == Decimal("0")
|
||||
assert item.extended_price == Decimal("0")
|
||||
|
||||
def test_empty_receipt_produces_empty_items(self, user: User, store: Store):
|
||||
"""Receipt with no items normalizes cleanly."""
|
||||
raw = {"receiptId": "EMPTY-001", "date": "2026-03-15", "total": "0.00"}
|
||||
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
|
||||
|
||||
assert purchase.receipt_id == "EMPTY-001"
|
||||
assert purchase.total == Decimal("0.00")
|
||||
assert len(purchase.items) == 0
|
||||
|
||||
def test_receipt_missing_date_defaults_to_today(self, user: User, store: Store):
|
||||
"""Receipt with no date field defaults to today."""
|
||||
raw = {"receiptId": "NO-DATE-001", "total": "5.00", "items": []}
|
||||
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
|
||||
assert purchase.purchase_date == date.today()
|
||||
|
||||
def test_receipt_missing_id_generates_uuid(self, user: User, store: Store):
|
||||
"""Receipt with no ID generates a UUID."""
|
||||
raw = {"date": "2026-03-15", "total": "10.00", "items": []}
|
||||
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
|
||||
|
||||
# Should be a valid UUID string
|
||||
uuid.UUID(purchase.receipt_id)
|
||||
|
||||
def test_item_with_garbage_upc_preserves_it(self):
|
||||
"""UPC field with non-standard content is preserved as-is after strip."""
|
||||
item = parse_meijer_item(
|
||||
{
|
||||
"description": "Weird UPC Product",
|
||||
"upc": " ABC-NOT-A-UPC ",
|
||||
"unitPrice": "1.99",
|
||||
}
|
||||
)
|
||||
# lstrip("0") on "ABC-NOT-A-UPC" leaves it intact
|
||||
assert item.upc == "ABC-NOT-A-UPC"
|
||||
|
||||
def test_negative_prices_pass_through(self):
|
||||
"""Negative prices (refunds) are preserved, not zeroed."""
|
||||
item = parse_meijer_item(
|
||||
{
|
||||
"description": "Refund Item",
|
||||
"unitPrice": "-5.99",
|
||||
"extendedPrice": "-5.99",
|
||||
}
|
||||
)
|
||||
assert item.unit_price == Decimal("-5.99")
|
||||
assert item.extended_price == Decimal("-5.99")
|
||||
|
||||
def test_extended_price_auto_calculated(self):
|
||||
"""When extendedPrice is missing, it's calculated from unitPrice * quantity."""
|
||||
item = parse_meijer_item(
|
||||
{
|
||||
"description": "No Extended",
|
||||
"unitPrice": "2.50",
|
||||
"quantity": "3",
|
||||
}
|
||||
)
|
||||
assert item.extended_price == Decimal("7.50")
|
||||
|
||||
def test_matching_with_malformed_items(self, e2e_session: Session):
|
||||
"""ProductMatcher handles items with missing/empty names gracefully."""
|
||||
matcher = ProductMatcher(e2e_session, auto_create=True)
|
||||
|
||||
bad_items = [
|
||||
parse_meijer_item({"description": "", "unitPrice": "1.00"}),
|
||||
parse_meijer_item({"unitPrice": "2.00"}),
|
||||
]
|
||||
|
||||
outcomes = matcher.match_items(bad_items)
|
||||
assert len(outcomes) == 2
|
||||
# Both should auto-create (no match possible for empty names)
|
||||
assert all(o.created_new for o in outcomes)
|
||||
|
||||
def test_completely_empty_receipt(self, user: User, store: Store):
|
||||
"""Totally empty dict produces a valid PurchaseCreate with defaults."""
|
||||
purchase = normalize_receipt({}, user_id=str(user.id), store_id=str(store.id))
|
||||
assert purchase.total == Decimal("0")
|
||||
assert len(purchase.items) == 0
|
||||
assert purchase.purchase_date == date.today()
|
||||
|
||||
def test_mixed_valid_and_malformed_items(self, user: User, store: Store):
|
||||
"""Receipt with a mix of good and bad items processes all of them."""
|
||||
raw = {
|
||||
"receiptId": "MIX-001",
|
||||
"date": "2026-03-15",
|
||||
"total": "10.00",
|
||||
"items": [
|
||||
{
|
||||
"description": "Good Product 8 oz",
|
||||
"upc": "1234567890",
|
||||
"unitPrice": "3.99",
|
||||
"extendedPrice": "3.99",
|
||||
},
|
||||
{
|
||||
"unitPrice": "not_a_price",
|
||||
},
|
||||
{
|
||||
"description": " *** Special Chars !!! ",
|
||||
"unitPrice": "2.50",
|
||||
},
|
||||
],
|
||||
}
|
||||
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
|
||||
assert len(purchase.items) == 3
|
||||
|
||||
# Good item
|
||||
assert purchase.items[0].product_name_raw == "Good Product 8 oz"
|
||||
assert purchase.items[0].upc == "1234567890"
|
||||
|
||||
# Bad price item
|
||||
assert purchase.items[1].unit_price == Decimal("0")
|
||||
|
||||
# Special chars stripped
|
||||
assert purchase.items[2].product_name_raw == "Special Chars"
|
||||
@@ -0,0 +1,160 @@
|
||||
"""Tests for product matching & dedup pipeline."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.constants import MatchConfidence
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from cartsnitch_common.normalization import MatchMethod
|
||||
from cartsnitch_common.pipeline.matching import (
|
||||
ProductMatcher,
|
||||
classify_confidence,
|
||||
match_purchase_item,
|
||||
)
|
||||
from cartsnitch_common.schemas.purchase import PurchaseItemCreate
|
||||
|
||||
|
||||
class TestClassifyConfidence:
|
||||
def test_upc_always_high(self):
|
||||
assert classify_confidence(1.0, MatchMethod.UPC) == MatchConfidence.HIGH
|
||||
assert classify_confidence(0.5, MatchMethod.UPC) == MatchConfidence.HIGH
|
||||
|
||||
def test_name_high(self):
|
||||
assert classify_confidence(0.9, MatchMethod.NAME) == MatchConfidence.HIGH
|
||||
assert classify_confidence(0.8, MatchMethod.NAME) == MatchConfidence.HIGH
|
||||
|
||||
def test_name_medium(self):
|
||||
assert classify_confidence(0.6, MatchMethod.NAME) == MatchConfidence.MEDIUM
|
||||
assert classify_confidence(0.5, MatchMethod.NAME) == MatchConfidence.MEDIUM
|
||||
|
||||
def test_name_low(self):
|
||||
assert classify_confidence(0.3, MatchMethod.NAME) == MatchConfidence.LOW
|
||||
assert classify_confidence(0.0, MatchMethod.NAME) == MatchConfidence.LOW
|
||||
|
||||
|
||||
class TestProductMatcher:
|
||||
def _make_item(self, name: str, upc: str | None = None) -> PurchaseItemCreate:
|
||||
return PurchaseItemCreate(
|
||||
product_name_raw=name,
|
||||
upc=upc,
|
||||
unit_price=Decimal("3.99"),
|
||||
extended_price=Decimal("3.99"),
|
||||
)
|
||||
|
||||
def test_match_by_upc(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Whole Milk Gallon",
|
||||
upc_variants=["041250000001"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
matcher = ProductMatcher(session)
|
||||
item = self._make_item("Kroger Milk", upc="041250000001")
|
||||
prod, result, confidence = matcher.match_single(item)
|
||||
|
||||
assert prod is not None
|
||||
assert prod.id == product.id
|
||||
assert result is not None
|
||||
assert result.method == MatchMethod.UPC
|
||||
assert confidence == MatchConfidence.HIGH
|
||||
|
||||
def test_match_by_name(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Whole Milk Gallon",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
matcher = ProductMatcher(session, name_threshold=0.3)
|
||||
item = self._make_item("Whole Milk Gallon Size")
|
||||
prod, result, confidence = matcher.match_single(item)
|
||||
|
||||
assert prod is not None
|
||||
assert result is not None
|
||||
assert result.method == MatchMethod.NAME
|
||||
|
||||
def test_auto_create_when_no_match(self, session):
|
||||
matcher = ProductMatcher(session, auto_create=True)
|
||||
item = self._make_item("Unique Product XYZ 16 oz")
|
||||
prod, result, confidence = matcher.match_single(item)
|
||||
|
||||
assert prod is not None
|
||||
assert result is None # No match found, was created
|
||||
assert confidence == MatchConfidence.LOW
|
||||
assert prod.canonical_name == "Unique Product XYZ 16 oz"
|
||||
assert prod.size == "16"
|
||||
assert prod.size_unit == "oz"
|
||||
|
||||
def test_no_create_when_disabled(self, session):
|
||||
matcher = ProductMatcher(session, auto_create=False)
|
||||
item = self._make_item("Nonexistent Product")
|
||||
prod, result, confidence = matcher.match_single(item)
|
||||
|
||||
assert prod is None
|
||||
assert result is None
|
||||
|
||||
def test_batch_match(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Large Eggs 12 Count",
|
||||
upc_variants=["012345"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
matcher = ProductMatcher(session)
|
||||
items = [
|
||||
self._make_item("Large Eggs", upc="012345"),
|
||||
self._make_item("Brand New Never Seen Product"),
|
||||
]
|
||||
outcomes = matcher.match_items(items)
|
||||
|
||||
assert len(outcomes) == 2
|
||||
assert outcomes[0].match is not None
|
||||
assert outcomes[0].confidence_level == MatchConfidence.HIGH
|
||||
assert outcomes[0].created_new is False
|
||||
assert outcomes[1].match is None
|
||||
assert outcomes[1].created_new is True
|
||||
|
||||
|
||||
class TestMatchPurchaseItem:
|
||||
def test_convenience_function(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="Ground Beef 80/20",
|
||||
upc_variants=["999888"],
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.commit()
|
||||
|
||||
item = PurchaseItemCreate(
|
||||
product_name_raw="Ground Beef",
|
||||
upc="999888",
|
||||
unit_price=Decimal("5.99"),
|
||||
extended_price=Decimal("5.99"),
|
||||
)
|
||||
prod, confidence = match_purchase_item(session, item)
|
||||
assert prod is not None
|
||||
assert confidence == MatchConfidence.HIGH
|
||||
|
||||
def test_auto_create_default(self, session):
|
||||
item = PurchaseItemCreate(
|
||||
product_name_raw="Totally New Item",
|
||||
unit_price=Decimal("1.00"),
|
||||
extended_price=Decimal("1.00"),
|
||||
)
|
||||
prod, confidence = match_purchase_item(session, item)
|
||||
assert prod is not None
|
||||
assert confidence == MatchConfidence.LOW
|
||||
@@ -0,0 +1,282 @@
|
||||
"""Tests for price history tracking pipeline."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.constants import PriceSource, StoreSlug
|
||||
from cartsnitch_common.models.price import PriceHistory
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from cartsnitch_common.models.store import Store
|
||||
from cartsnitch_common.pipeline.price_tracking import (
|
||||
PriceDelta,
|
||||
get_latest_price,
|
||||
get_price_trend,
|
||||
record_price_from_item,
|
||||
)
|
||||
|
||||
|
||||
def _make_store(session, slug=StoreSlug.MEIJER) -> Store:
|
||||
store = Store(
|
||||
id=uuid.uuid4(),
|
||||
name="Meijer",
|
||||
slug=slug,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(store)
|
||||
session.flush()
|
||||
return store
|
||||
|
||||
|
||||
def _make_product(session, name="Test Product") -> NormalizedProduct:
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name=name,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.flush()
|
||||
return product
|
||||
|
||||
|
||||
class TestGetLatestPrice:
|
||||
def test_no_history(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
result = get_latest_price(session, product.id, store.id)
|
||||
assert result is None
|
||||
|
||||
def test_returns_newest(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
# Add two entries
|
||||
old = PriceHistory(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 1),
|
||||
regular_price=Decimal("3.99"),
|
||||
source=PriceSource.RECEIPT,
|
||||
)
|
||||
new = PriceHistory(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 10),
|
||||
regular_price=Decimal("4.29"),
|
||||
source=PriceSource.RECEIPT,
|
||||
)
|
||||
session.add_all([old, new])
|
||||
session.flush()
|
||||
|
||||
result = get_latest_price(session, product.id, store.id)
|
||||
assert result is not None
|
||||
assert result.regular_price == Decimal("4.29")
|
||||
|
||||
|
||||
class TestRecordPriceFromItem:
|
||||
def test_first_price_no_delta(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
entry, delta = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("3.99"),
|
||||
)
|
||||
assert entry is not None
|
||||
assert entry.regular_price == Decimal("3.99")
|
||||
assert entry.source == PriceSource.RECEIPT
|
||||
assert delta is None
|
||||
|
||||
def test_price_increase_detected(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
# First price
|
||||
record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 1),
|
||||
regular_price=Decimal("3.99"),
|
||||
)
|
||||
|
||||
# Price increase
|
||||
entry, delta = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("4.49"),
|
||||
)
|
||||
|
||||
assert delta is not None
|
||||
assert delta.old_price == Decimal("3.99")
|
||||
assert delta.new_price == Decimal("4.49")
|
||||
assert delta.change_amount == Decimal("0.50")
|
||||
assert delta.is_increase is True
|
||||
assert delta.is_decrease is False
|
||||
assert delta.change_percent > Decimal("0")
|
||||
|
||||
def test_price_decrease_detected(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 1),
|
||||
regular_price=Decimal("5.00"),
|
||||
)
|
||||
|
||||
_, delta = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("4.00"),
|
||||
)
|
||||
|
||||
assert delta is not None
|
||||
assert delta.is_decrease is True
|
||||
assert delta.change_amount == Decimal("-1.00")
|
||||
|
||||
def test_same_price_no_delta(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 1),
|
||||
regular_price=Decimal("3.99"),
|
||||
)
|
||||
|
||||
_, delta = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("3.99"),
|
||||
)
|
||||
assert delta is None
|
||||
|
||||
def test_sale_and_loyalty_prices_recorded(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
entry, _ = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("5.99"),
|
||||
sale_price=Decimal("4.99"),
|
||||
loyalty_price=Decimal("4.49"),
|
||||
coupon_price=Decimal("3.99"),
|
||||
)
|
||||
assert entry.sale_price == Decimal("4.99")
|
||||
assert entry.loyalty_price == Decimal("4.49")
|
||||
assert entry.coupon_price == Decimal("3.99")
|
||||
|
||||
def test_custom_source(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
entry, _ = record_price_from_item(
|
||||
session,
|
||||
product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("3.99"),
|
||||
source=PriceSource.CATALOG,
|
||||
)
|
||||
assert entry.source == PriceSource.CATALOG
|
||||
|
||||
|
||||
class TestGetPriceTrend:
|
||||
def test_empty_trend(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
trend = get_price_trend(session, product.id, store.id)
|
||||
assert trend == []
|
||||
|
||||
def test_returns_newest_first(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
for day in [1, 5, 10, 15]:
|
||||
session.add(
|
||||
PriceHistory(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, day),
|
||||
regular_price=Decimal(str(3 + day * 0.1)),
|
||||
source=PriceSource.RECEIPT,
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
trend = get_price_trend(session, product.id, store.id)
|
||||
assert len(trend) == 4
|
||||
assert trend[0].observed_date == date(2026, 3, 15)
|
||||
assert trend[-1].observed_date == date(2026, 3, 1)
|
||||
|
||||
def test_respects_limit(self, session):
|
||||
product = _make_product(session)
|
||||
store = _make_store(session)
|
||||
|
||||
for day in range(1, 11):
|
||||
session.add(
|
||||
PriceHistory(
|
||||
id=uuid.uuid4(),
|
||||
normalized_product_id=product.id,
|
||||
store_id=store.id,
|
||||
observed_date=date(2026, 3, day),
|
||||
regular_price=Decimal("3.99"),
|
||||
source=PriceSource.RECEIPT,
|
||||
)
|
||||
)
|
||||
session.flush()
|
||||
|
||||
trend = get_price_trend(session, product.id, store.id, limit=3)
|
||||
assert len(trend) == 3
|
||||
|
||||
|
||||
class TestPriceDelta:
|
||||
def test_delta_properties(self):
|
||||
delta = PriceDelta(
|
||||
product_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
old_price=Decimal("3.99"),
|
||||
new_price=Decimal("4.49"),
|
||||
change_amount=Decimal("0.50"),
|
||||
change_percent=Decimal("12.53"),
|
||||
old_date=date(2026, 3, 1),
|
||||
new_date=date(2026, 3, 15),
|
||||
)
|
||||
assert delta.is_increase is True
|
||||
assert delta.is_decrease is False
|
||||
|
||||
def test_decrease_properties(self):
|
||||
delta = PriceDelta(
|
||||
product_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
old_price=Decimal("4.49"),
|
||||
new_price=Decimal("3.99"),
|
||||
change_amount=Decimal("-0.50"),
|
||||
change_percent=Decimal("-11.14"),
|
||||
old_date=date(2026, 3, 1),
|
||||
new_date=date(2026, 3, 15),
|
||||
)
|
||||
assert delta.is_decrease is True
|
||||
assert delta.is_increase is False
|
||||
@@ -0,0 +1,204 @@
|
||||
"""Tests for receipt normalization pipeline."""
|
||||
|
||||
import uuid
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.pipeline.receipt import (
|
||||
_clean_product_name,
|
||||
_safe_decimal,
|
||||
normalize_receipt,
|
||||
parse_meijer_item,
|
||||
)
|
||||
|
||||
|
||||
class TestCleanProductName:
|
||||
def test_strips_whitespace(self):
|
||||
assert _clean_product_name(" Milk ") == "Milk"
|
||||
|
||||
def test_removes_leading_punctuation(self):
|
||||
assert _clean_product_name("---Milk---") == "Milk"
|
||||
|
||||
def test_collapses_internal_whitespace(self):
|
||||
assert _clean_product_name("Whole Milk Gallon") == "Whole Milk Gallon"
|
||||
|
||||
def test_empty_string(self):
|
||||
assert _clean_product_name("") == ""
|
||||
|
||||
|
||||
class TestSafeDecimal:
|
||||
def test_string_input(self):
|
||||
assert _safe_decimal("3.99") == Decimal("3.99")
|
||||
|
||||
def test_float_input(self):
|
||||
assert _safe_decimal(3.99) == Decimal("3.99")
|
||||
|
||||
def test_int_input(self):
|
||||
assert _safe_decimal(4) == Decimal("4")
|
||||
|
||||
def test_none_returns_default(self):
|
||||
assert _safe_decimal(None) == Decimal("0")
|
||||
|
||||
def test_none_custom_default(self):
|
||||
assert _safe_decimal(None, Decimal("1")) == Decimal("1")
|
||||
|
||||
def test_invalid_returns_default(self):
|
||||
assert _safe_decimal("not-a-number") == Decimal("0")
|
||||
|
||||
def test_decimal_passthrough(self):
|
||||
assert _safe_decimal(Decimal("5.50")) == Decimal("5.50")
|
||||
|
||||
|
||||
class TestParseMeijerItem:
|
||||
def test_basic_item(self):
|
||||
raw = {
|
||||
"description": "Kroger Whole Milk 1 Gallon",
|
||||
"upc": "0041250000001",
|
||||
"quantity": 1,
|
||||
"unitPrice": "3.99",
|
||||
"extendedPrice": "3.99",
|
||||
"category": "DAIRY",
|
||||
}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.product_name_raw == "Kroger Whole Milk 1 Gallon"
|
||||
assert item.upc == "41250000001" # leading zeros stripped
|
||||
assert item.quantity == Decimal("1")
|
||||
assert item.unit_price == Decimal("3.99")
|
||||
assert item.extended_price == Decimal("3.99")
|
||||
assert item.category_raw == "DAIRY"
|
||||
|
||||
def test_alternate_field_names(self):
|
||||
raw = {
|
||||
"name": "Eggs Large 12 ct",
|
||||
"upcCode": "012345",
|
||||
"qty": 2,
|
||||
"price": "4.50",
|
||||
"totalPrice": "9.00",
|
||||
"department": "EGGS",
|
||||
}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.product_name_raw == "Eggs Large 12 ct"
|
||||
assert item.upc == "12345"
|
||||
assert item.quantity == Decimal("2")
|
||||
assert item.unit_price == Decimal("4.50")
|
||||
assert item.extended_price == Decimal("9.00")
|
||||
assert item.category_raw == "EGGS"
|
||||
|
||||
def test_calculates_extended_from_unit_price(self):
|
||||
raw = {
|
||||
"description": "Bananas",
|
||||
"unitPrice": "0.59",
|
||||
"quantity": 3,
|
||||
}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.extended_price == Decimal("1.77")
|
||||
|
||||
def test_discounts_parsed(self):
|
||||
raw = {
|
||||
"description": "Cereal",
|
||||
"unitPrice": "4.99",
|
||||
"extendedPrice": "4.99",
|
||||
"regularPrice": "5.99",
|
||||
"salePrice": "4.99",
|
||||
"couponAmount": "1.00",
|
||||
"loyaltyAmount": "0.50",
|
||||
}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.regular_price == Decimal("5.99")
|
||||
assert item.sale_price == Decimal("4.99")
|
||||
assert item.coupon_discount == Decimal("1.00")
|
||||
assert item.loyalty_discount == Decimal("0.50")
|
||||
|
||||
def test_alternate_discount_names(self):
|
||||
raw = {
|
||||
"description": "Bread",
|
||||
"unitPrice": "2.99",
|
||||
"extendedPrice": "2.99",
|
||||
"couponDiscount": "0.75",
|
||||
"loyaltyDiscount": "0.25",
|
||||
}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.coupon_discount == Decimal("0.75")
|
||||
assert item.loyalty_discount == Decimal("0.25")
|
||||
|
||||
def test_missing_fields_default_gracefully(self):
|
||||
raw = {"description": "Mystery Item"}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.product_name_raw == "Mystery Item"
|
||||
assert item.upc is None
|
||||
assert item.quantity == Decimal("1")
|
||||
assert item.unit_price == Decimal("0")
|
||||
assert item.regular_price is None
|
||||
assert item.category_raw is None
|
||||
|
||||
def test_no_upc_returns_none(self):
|
||||
raw = {"description": "Loose Bananas", "unitPrice": "1.00", "extendedPrice": "1.00"}
|
||||
item = parse_meijer_item(raw)
|
||||
assert item.upc is None
|
||||
|
||||
|
||||
class TestNormalizeReceipt:
|
||||
def test_full_receipt(self):
|
||||
user_id = str(uuid.uuid4())
|
||||
store_id = str(uuid.uuid4())
|
||||
raw = {
|
||||
"receiptId": "REC-001",
|
||||
"date": "2026-03-15",
|
||||
"total": "25.47",
|
||||
"subtotal": "23.00",
|
||||
"tax": "2.47",
|
||||
"savings": "3.00",
|
||||
"items": [
|
||||
{"description": "Milk", "unitPrice": "3.99", "extendedPrice": "3.99"},
|
||||
{"description": "Bread", "unitPrice": "2.50", "extendedPrice": "2.50"},
|
||||
],
|
||||
}
|
||||
purchase = normalize_receipt(raw, user_id, store_id)
|
||||
assert purchase.receipt_id == "REC-001"
|
||||
assert purchase.purchase_date == date(2026, 3, 15)
|
||||
assert purchase.total == Decimal("25.47")
|
||||
assert purchase.subtotal == Decimal("23.00")
|
||||
assert purchase.tax == Decimal("2.47")
|
||||
assert purchase.savings_total == Decimal("3.00")
|
||||
assert len(purchase.items) == 2
|
||||
assert purchase.items[0].product_name_raw == "Milk"
|
||||
assert purchase.raw_data == raw
|
||||
|
||||
def test_alternate_receipt_fields(self):
|
||||
user_id = str(uuid.uuid4())
|
||||
store_id = str(uuid.uuid4())
|
||||
raw = {
|
||||
"receipt_id": "REC-002",
|
||||
"purchaseDate": "2026-03-14",
|
||||
"totalAmount": "10.00",
|
||||
"taxAmount": "0.75",
|
||||
"totalSavings": "1.50",
|
||||
"items": [],
|
||||
}
|
||||
purchase = normalize_receipt(raw, user_id, store_id)
|
||||
assert purchase.receipt_id == "REC-002"
|
||||
assert purchase.purchase_date == date(2026, 3, 14)
|
||||
assert purchase.total == Decimal("10.00")
|
||||
assert purchase.tax == Decimal("0.75")
|
||||
assert purchase.savings_total == Decimal("1.50")
|
||||
|
||||
def test_missing_date_defaults_to_today(self):
|
||||
user_id = str(uuid.uuid4())
|
||||
store_id = str(uuid.uuid4())
|
||||
raw = {"total": "5.00", "items": []}
|
||||
purchase = normalize_receipt(raw, user_id, store_id)
|
||||
assert purchase.purchase_date == date.today()
|
||||
|
||||
def test_generates_receipt_id_if_missing(self):
|
||||
user_id = str(uuid.uuid4())
|
||||
store_id = str(uuid.uuid4())
|
||||
raw = {"total": "5.00", "date": "2026-03-15", "items": []}
|
||||
purchase = normalize_receipt(raw, user_id, store_id)
|
||||
assert purchase.receipt_id # Should be a generated UUID string
|
||||
|
||||
def test_date_object_passthrough(self):
|
||||
user_id = str(uuid.uuid4())
|
||||
store_id = str(uuid.uuid4())
|
||||
raw = {"date": date(2026, 1, 1), "total": "5.00", "items": []}
|
||||
purchase = normalize_receipt(raw, user_id, store_id)
|
||||
assert purchase.purchase_date == date(2026, 1, 1)
|
||||
@@ -0,0 +1,233 @@
|
||||
"""Tests for shrinkflation detection pipeline."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.constants import SizeUnit
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from cartsnitch_common.pipeline.shrinkflation import (
|
||||
_to_comparable,
|
||||
_units_comparable,
|
||||
detect_shrinkflation,
|
||||
)
|
||||
|
||||
|
||||
class TestToComparable:
|
||||
def test_oz_to_grams(self):
|
||||
result = _to_comparable("16", SizeUnit.OZ)
|
||||
assert result is not None
|
||||
assert result == Decimal("16") * Decimal("28.3495")
|
||||
|
||||
def test_lb_to_grams(self):
|
||||
result = _to_comparable("1", SizeUnit.LB)
|
||||
assert result == Decimal("453.592")
|
||||
|
||||
def test_ml_to_ml(self):
|
||||
assert _to_comparable("500", SizeUnit.ML) == Decimal("500")
|
||||
|
||||
def test_fl_oz_to_ml(self):
|
||||
result = _to_comparable("12", SizeUnit.FL_OZ)
|
||||
assert result is not None
|
||||
assert result == Decimal("12") * Decimal("29.5735")
|
||||
|
||||
def test_count_units(self):
|
||||
assert _to_comparable("12", SizeUnit.CT) == Decimal("12")
|
||||
assert _to_comparable("6", SizeUnit.PK) == Decimal("6")
|
||||
|
||||
def test_invalid_size(self):
|
||||
assert _to_comparable("abc", SizeUnit.OZ) is None
|
||||
|
||||
|
||||
class TestUnitsComparable:
|
||||
def test_weight_comparable(self):
|
||||
assert _units_comparable(SizeUnit.OZ, SizeUnit.LB) is True
|
||||
assert _units_comparable(SizeUnit.G, SizeUnit.KG) is True
|
||||
|
||||
def test_volume_comparable(self):
|
||||
assert _units_comparable(SizeUnit.ML, SizeUnit.L) is True
|
||||
assert _units_comparable(SizeUnit.FL_OZ, SizeUnit.ML) is True
|
||||
|
||||
def test_count_comparable(self):
|
||||
assert _units_comparable(SizeUnit.CT, SizeUnit.PK) is True
|
||||
|
||||
def test_not_comparable_across_systems(self):
|
||||
assert _units_comparable(SizeUnit.OZ, SizeUnit.ML) is False
|
||||
assert _units_comparable(SizeUnit.CT, SizeUnit.OZ) is False
|
||||
assert _units_comparable(SizeUnit.LB, SizeUnit.L) is False
|
||||
|
||||
|
||||
class TestDetectShrinkflation:
|
||||
def _make_product(self, session, size: str, unit: SizeUnit, name: str = "Test Product"):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name=name,
|
||||
size=size,
|
||||
size_unit=unit,
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.flush()
|
||||
return product
|
||||
|
||||
def test_detects_oz_decrease(self, session):
|
||||
product = self._make_product(session, "16", SizeUnit.OZ)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="14",
|
||||
new_unit=SizeUnit.OZ,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event is not None
|
||||
assert event.old_size == "16"
|
||||
assert event.new_size == "14"
|
||||
assert "decreased" in event.notes.lower()
|
||||
|
||||
def test_no_detection_when_size_increases(self, session):
|
||||
product = self._make_product(session, "14", SizeUnit.OZ)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="16",
|
||||
new_unit=SizeUnit.OZ,
|
||||
)
|
||||
assert event is None
|
||||
|
||||
def test_no_detection_same_size(self, session):
|
||||
product = self._make_product(session, "16", SizeUnit.OZ)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="16",
|
||||
new_unit=SizeUnit.OZ,
|
||||
)
|
||||
assert event is None
|
||||
|
||||
def test_no_detection_incompatible_units(self, session):
|
||||
product = self._make_product(session, "16", SizeUnit.OZ)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="400",
|
||||
new_unit=SizeUnit.ML,
|
||||
)
|
||||
assert event is None
|
||||
|
||||
def test_no_detection_without_existing_size(self, session):
|
||||
product = NormalizedProduct(
|
||||
id=uuid.uuid4(),
|
||||
canonical_name="No Size Product",
|
||||
created_at=datetime.now(UTC),
|
||||
updated_at=datetime.now(UTC),
|
||||
)
|
||||
session.add(product)
|
||||
session.flush()
|
||||
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="12",
|
||||
new_unit=SizeUnit.OZ,
|
||||
)
|
||||
assert event is None
|
||||
|
||||
def test_cross_unit_detection_same_system(self, session):
|
||||
# 1 lb = 453.592g, 14 oz = 396.893g → size decreased
|
||||
product = self._make_product(session, "1", SizeUnit.LB)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="14",
|
||||
new_unit=SizeUnit.OZ,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event is not None
|
||||
|
||||
def test_count_decrease(self, session):
|
||||
product = self._make_product(session, "12", SizeUnit.CT)
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="10",
|
||||
new_unit=SizeUnit.CT,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event is not None
|
||||
assert event.old_size == "12"
|
||||
assert event.new_size == "10"
|
||||
|
||||
def test_dedup_existing_event(self, session):
|
||||
product = self._make_product(session, "16", SizeUnit.OZ)
|
||||
|
||||
# First detection
|
||||
event1 = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="14",
|
||||
new_unit=SizeUnit.OZ,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
|
||||
# Same detection again — should return existing
|
||||
event2 = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="14",
|
||||
new_unit=SizeUnit.OZ,
|
||||
detected_date=date(2026, 3, 16),
|
||||
)
|
||||
|
||||
assert event1 is not None
|
||||
assert event2 is not None
|
||||
assert event1.id == event2.id
|
||||
|
||||
def test_confidence_scaling(self, session):
|
||||
# Small decrease (< 5%) → 0.70
|
||||
product1 = self._make_product(session, "100", SizeUnit.G, "Product A")
|
||||
event1 = detect_shrinkflation(
|
||||
session,
|
||||
product=product1,
|
||||
new_size="97",
|
||||
new_unit=SizeUnit.G,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event1 is not None
|
||||
assert event1.confidence == Decimal("0.70")
|
||||
|
||||
# Medium decrease (5-10%) → 0.85
|
||||
product2 = self._make_product(session, "100", SizeUnit.G, "Product B")
|
||||
event2 = detect_shrinkflation(
|
||||
session,
|
||||
product=product2,
|
||||
new_size="93",
|
||||
new_unit=SizeUnit.G,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event2 is not None
|
||||
assert event2.confidence == Decimal("0.85")
|
||||
|
||||
# Large decrease (>= 10%) → 0.95
|
||||
product3 = self._make_product(session, "100", SizeUnit.G, "Product C")
|
||||
event3 = detect_shrinkflation(
|
||||
session,
|
||||
product=product3,
|
||||
new_size="85",
|
||||
new_unit=SizeUnit.G,
|
||||
detected_date=date(2026, 3, 15),
|
||||
)
|
||||
assert event3 is not None
|
||||
assert event3.confidence == Decimal("0.95")
|
||||
|
||||
def test_min_size_decrease_threshold(self, session):
|
||||
product = self._make_product(session, "100", SizeUnit.G)
|
||||
# 0.5% decrease — below default 1% threshold
|
||||
event = detect_shrinkflation(
|
||||
session,
|
||||
product=product,
|
||||
new_size="99.5",
|
||||
new_unit=SizeUnit.G,
|
||||
min_size_decrease_pct=Decimal("1"),
|
||||
)
|
||||
assert event is None
|
||||
@@ -0,0 +1,225 @@
|
||||
"""Tests for Pydantic v2 schemas."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime
|
||||
from decimal import Decimal
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from cartsnitch_common.constants import (
|
||||
AccountStatus,
|
||||
DiscountType,
|
||||
EventType,
|
||||
PriceSource,
|
||||
ProductCategory,
|
||||
SizeUnit,
|
||||
StoreSlug,
|
||||
)
|
||||
from cartsnitch_common.schemas import (
|
||||
CouponCreate,
|
||||
EventEnvelope,
|
||||
NormalizedProductCreate,
|
||||
PriceHistoryCreate,
|
||||
PurchaseCreate,
|
||||
PurchaseItemCreate,
|
||||
ShrinkflationEventCreate,
|
||||
StoreCreate,
|
||||
StoreLocationCreate,
|
||||
StoreRead,
|
||||
UserCreate,
|
||||
UserStoreAccountCreate,
|
||||
)
|
||||
|
||||
|
||||
class TestStoreSchemas:
|
||||
def test_store_create_valid(self):
|
||||
s = StoreCreate(name="Meijer", slug=StoreSlug.MEIJER)
|
||||
assert s.slug == StoreSlug.MEIJER
|
||||
|
||||
def test_store_create_invalid_slug(self):
|
||||
with pytest.raises(ValidationError):
|
||||
StoreCreate(name="Walmart", slug="walmart")
|
||||
|
||||
def test_store_read_from_attributes(self):
|
||||
data = {
|
||||
"id": uuid.uuid4(),
|
||||
"name": "Kroger",
|
||||
"slug": StoreSlug.KROGER,
|
||||
"logo_url": None,
|
||||
"website_url": None,
|
||||
"created_at": datetime.now(UTC),
|
||||
"updated_at": datetime.now(UTC),
|
||||
}
|
||||
s = StoreRead(**data)
|
||||
assert s.slug == StoreSlug.KROGER
|
||||
|
||||
|
||||
class TestStoreLocationSchemas:
|
||||
def test_location_create(self):
|
||||
loc = StoreLocationCreate(
|
||||
store_id=uuid.uuid4(),
|
||||
address="456 Oak Ave",
|
||||
city="Detroit",
|
||||
state="MI",
|
||||
zip="48201",
|
||||
)
|
||||
assert loc.city == "Detroit"
|
||||
|
||||
|
||||
class TestUserSchemas:
|
||||
def test_user_create_valid(self):
|
||||
u = UserCreate(email="test@example.com", password="secret123")
|
||||
assert u.email == "test@example.com"
|
||||
|
||||
def test_user_create_invalid_email(self):
|
||||
with pytest.raises(ValidationError):
|
||||
UserCreate(email="not-an-email", password="secret123")
|
||||
|
||||
|
||||
class TestUserStoreAccountSchemas:
|
||||
def test_account_create_with_status(self):
|
||||
a = UserStoreAccountCreate(
|
||||
user_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
status=AccountStatus.EXPIRED,
|
||||
)
|
||||
assert a.status == AccountStatus.EXPIRED
|
||||
|
||||
def test_account_create_default_status(self):
|
||||
a = UserStoreAccountCreate(
|
||||
user_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
)
|
||||
assert a.status == AccountStatus.ACTIVE
|
||||
|
||||
def test_account_create_invalid_status(self):
|
||||
with pytest.raises(ValidationError):
|
||||
UserStoreAccountCreate(
|
||||
user_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
status="invalid_status",
|
||||
)
|
||||
|
||||
|
||||
class TestPurchaseSchemas:
|
||||
def test_purchase_create_with_items(self):
|
||||
p = PurchaseCreate(
|
||||
user_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
receipt_id="RCP-001",
|
||||
purchase_date=date(2026, 3, 15),
|
||||
total=Decimal("42.50"),
|
||||
items=[
|
||||
PurchaseItemCreate(
|
||||
product_name_raw="Milk",
|
||||
unit_price=Decimal("3.49"),
|
||||
extended_price=Decimal("3.49"),
|
||||
),
|
||||
],
|
||||
)
|
||||
assert len(p.items) == 1
|
||||
assert p.items[0].quantity == Decimal("1")
|
||||
|
||||
|
||||
class TestNormalizedProductSchemas:
|
||||
def test_product_create_with_enums(self):
|
||||
p = NormalizedProductCreate(
|
||||
canonical_name="Whole Milk, 1 Gallon",
|
||||
category=ProductCategory.DAIRY,
|
||||
size_unit=SizeUnit.FL_OZ,
|
||||
upc_variants=["0041250000001"],
|
||||
)
|
||||
assert p.category == ProductCategory.DAIRY
|
||||
|
||||
def test_product_create_invalid_category(self):
|
||||
with pytest.raises(ValidationError):
|
||||
NormalizedProductCreate(
|
||||
canonical_name="Test",
|
||||
category="invalid_category",
|
||||
)
|
||||
|
||||
|
||||
class TestPriceHistorySchemas:
|
||||
def test_price_create(self):
|
||||
p = PriceHistoryCreate(
|
||||
normalized_product_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("4.99"),
|
||||
source=PriceSource.RECEIPT,
|
||||
)
|
||||
assert p.source == PriceSource.RECEIPT
|
||||
|
||||
def test_price_create_invalid_source(self):
|
||||
with pytest.raises(ValidationError):
|
||||
PriceHistoryCreate(
|
||||
normalized_product_id=uuid.uuid4(),
|
||||
store_id=uuid.uuid4(),
|
||||
observed_date=date(2026, 3, 15),
|
||||
regular_price=Decimal("4.99"),
|
||||
source="invalid_source",
|
||||
)
|
||||
|
||||
|
||||
class TestCouponSchemas:
|
||||
def test_coupon_create(self):
|
||||
c = CouponCreate(
|
||||
store_id=uuid.uuid4(),
|
||||
title="BOGO Chips",
|
||||
discount_type=DiscountType.BOGO,
|
||||
)
|
||||
assert c.discount_type == DiscountType.BOGO
|
||||
|
||||
def test_coupon_create_invalid_discount_type(self):
|
||||
with pytest.raises(ValidationError):
|
||||
CouponCreate(
|
||||
store_id=uuid.uuid4(),
|
||||
title="Test",
|
||||
discount_type="free_stuff",
|
||||
)
|
||||
|
||||
|
||||
class TestShrinkflationEventSchemas:
|
||||
def test_shrinkflation_create(self):
|
||||
s = ShrinkflationEventCreate(
|
||||
normalized_product_id=uuid.uuid4(),
|
||||
detected_date=date(2026, 3, 10),
|
||||
old_size="18",
|
||||
new_size="15.4",
|
||||
old_unit=SizeUnit.OZ,
|
||||
new_unit=SizeUnit.OZ,
|
||||
confidence=Decimal("0.95"),
|
||||
)
|
||||
assert s.old_unit == SizeUnit.OZ
|
||||
|
||||
def test_shrinkflation_create_invalid_unit(self):
|
||||
with pytest.raises(ValidationError):
|
||||
ShrinkflationEventCreate(
|
||||
normalized_product_id=uuid.uuid4(),
|
||||
detected_date=date(2026, 3, 10),
|
||||
old_size="18",
|
||||
new_size="15.4",
|
||||
old_unit="bushels",
|
||||
new_unit=SizeUnit.OZ,
|
||||
)
|
||||
|
||||
|
||||
class TestEventEnvelope:
|
||||
def test_valid_event(self):
|
||||
e = EventEnvelope(
|
||||
event_type=EventType.RECEIPTS_INGESTED,
|
||||
timestamp=datetime.now(UTC),
|
||||
service="receiptwitness",
|
||||
payload={"receipt_id": "RCP-001"},
|
||||
)
|
||||
assert e.event_type == EventType.RECEIPTS_INGESTED
|
||||
|
||||
def test_invalid_event_type(self):
|
||||
with pytest.raises(ValidationError):
|
||||
EventEnvelope(
|
||||
event_type="invalid.event",
|
||||
timestamp=datetime.now(UTC),
|
||||
service="test",
|
||||
payload={},
|
||||
)
|
||||
@@ -0,0 +1,357 @@
|
||||
"""Tests for the seed data generator."""
|
||||
|
||||
import random
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_ACTIVE_USERS,
|
||||
NUM_COUPONS,
|
||||
NUM_PRICE_HISTORY,
|
||||
NUM_PRODUCTS,
|
||||
NUM_PURCHASE_ITEMS,
|
||||
NUM_PURCHASES,
|
||||
NUM_SHRINKFLATION_EVENTS,
|
||||
NUM_STORES,
|
||||
NUM_USERS,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
SEED_VALUE,
|
||||
)
|
||||
from cartsnitch_common.seed.generators.coupons import generate_coupons
|
||||
from cartsnitch_common.seed.generators.prices import generate_price_history
|
||||
from cartsnitch_common.seed.generators.products import generate_products
|
||||
from cartsnitch_common.seed.generators.purchases import generate_purchase_items, generate_purchases
|
||||
from cartsnitch_common.seed.generators.shrinkflation import generate_shrinkflation_events
|
||||
from cartsnitch_common.seed.generators.stores import generate_store_locations, generate_stores
|
||||
from cartsnitch_common.seed.generators.users import generate_users
|
||||
|
||||
|
||||
def _seed() -> None:
|
||||
random.seed(SEED_VALUE)
|
||||
Faker.seed(SEED_VALUE)
|
||||
|
||||
|
||||
def _make_fake() -> Faker:
|
||||
return Faker()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Stores
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_stores_count() -> None:
|
||||
stores = generate_stores()
|
||||
assert len(stores) == NUM_STORES
|
||||
|
||||
|
||||
def test_generate_stores_deterministic() -> None:
|
||||
stores_a = generate_stores()
|
||||
stores_b = generate_stores()
|
||||
# Stores are fixed (no RNG), so slugs are stable
|
||||
slugs_a = {s["slug"] for s in stores_a}
|
||||
slugs_b = {s["slug"] for s in stores_b}
|
||||
assert slugs_a == slugs_b
|
||||
|
||||
|
||||
def test_generate_store_locations_count() -> None:
|
||||
stores = generate_stores()
|
||||
locs = generate_store_locations(stores)
|
||||
assert len(locs) == 15 # 3 stores * 5 locations
|
||||
|
||||
|
||||
def test_generate_store_locations_fk() -> None:
|
||||
stores = generate_stores()
|
||||
locs = generate_store_locations(stores)
|
||||
store_ids = {s["id"] for s in stores}
|
||||
for loc in locs:
|
||||
assert loc["store_id"] in store_ids
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Users
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_users_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
users = generate_users(fake)
|
||||
assert len(users) == NUM_USERS
|
||||
|
||||
|
||||
def test_generate_users_active_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
users = generate_users(fake)
|
||||
active = [u for u in users if u["_active"]]
|
||||
assert len(active) == NUM_ACTIVE_USERS
|
||||
|
||||
|
||||
def test_generate_users_deterministic() -> None:
|
||||
_seed()
|
||||
fake_a = _make_fake()
|
||||
users_a = generate_users(fake_a)
|
||||
|
||||
_seed()
|
||||
fake_b = _make_fake()
|
||||
users_b = generate_users(fake_b)
|
||||
|
||||
# Emails should match (same seed → same Faker output)
|
||||
emails_a = [u["email"] for u in users_a]
|
||||
emails_b = [u["email"] for u in users_b]
|
||||
assert emails_a == emails_b
|
||||
|
||||
|
||||
def test_generate_users_unique_emails() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
users = generate_users(fake)
|
||||
emails = [u["email"] for u in users]
|
||||
assert len(emails) == len(set(emails))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Products
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_products_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
assert len(products) == NUM_PRODUCTS
|
||||
|
||||
|
||||
def test_generate_products_deterministic() -> None:
|
||||
_seed()
|
||||
fake_a = _make_fake()
|
||||
products_a = generate_products(fake_a)
|
||||
|
||||
_seed()
|
||||
fake_b = _make_fake()
|
||||
products_b = generate_products(fake_b)
|
||||
|
||||
names_a = [p["canonical_name"] for p in products_a]
|
||||
names_b = [p["canonical_name"] for p in products_b]
|
||||
assert names_a == names_b
|
||||
|
||||
|
||||
def test_generate_products_have_categories() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
for product in products:
|
||||
assert product["category"] is not None
|
||||
|
||||
|
||||
def test_generate_products_have_upc_variants() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
for product in products:
|
||||
assert product["upc_variants"]
|
||||
assert isinstance(product["upc_variants"], list)
|
||||
assert len(product["upc_variants"]) >= 1
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Purchases
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_purchases_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
assert len(purchases) == NUM_PURCHASES
|
||||
|
||||
|
||||
def test_generate_purchases_fk() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
|
||||
user_ids = {u["id"] for u in users}
|
||||
store_ids = {s["id"] for s in stores}
|
||||
for p in purchases:
|
||||
assert p["user_id"] in user_ids
|
||||
assert p["store_id"] in store_ids
|
||||
|
||||
|
||||
def test_generate_purchase_items_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
products = generate_products(fake)
|
||||
items = generate_purchase_items(purchases, products)
|
||||
# Should be close to target (within 20%)
|
||||
assert abs(len(items) - NUM_PURCHASE_ITEMS) < NUM_PURCHASE_ITEMS * 0.20
|
||||
|
||||
|
||||
def test_generate_purchase_items_fk() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
products = generate_products(fake)
|
||||
items = generate_purchase_items(purchases, products)
|
||||
|
||||
purchase_ids = {p["id"] for p in purchases}
|
||||
product_ids = {p["id"] for p in products}
|
||||
for item in items:
|
||||
assert item["purchase_id"] in purchase_ids
|
||||
assert item["normalized_product_id"] in product_ids
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Price History
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_price_history_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
products = generate_products(fake)
|
||||
items = generate_purchase_items(purchases, products)
|
||||
prices = generate_price_history(products, stores, items)
|
||||
# Should be within 10% of target
|
||||
assert abs(len(prices) - NUM_PRICE_HISTORY) < NUM_PRICE_HISTORY * 0.10
|
||||
|
||||
|
||||
def test_generate_price_history_fk() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
products = generate_products(fake)
|
||||
items = generate_purchase_items(purchases, products)
|
||||
prices = generate_price_history(products, stores, items)
|
||||
|
||||
product_ids = {p["id"] for p in products}
|
||||
store_ids = {s["id"] for s in stores}
|
||||
for ph in prices:
|
||||
assert ph["normalized_product_id"] in product_ids
|
||||
assert ph["store_id"] in store_ids
|
||||
assert ph["regular_price"] > 0
|
||||
|
||||
|
||||
def test_price_history_dates_in_range() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
store_locs = generate_store_locations(stores)
|
||||
users = generate_users(fake)
|
||||
purchases = generate_purchases(users, stores, store_locs)
|
||||
products = generate_products(fake)
|
||||
items = generate_purchase_items(purchases, products)
|
||||
prices = generate_price_history(products, stores, items)
|
||||
|
||||
for ph in prices:
|
||||
assert SEED_START_DATE <= ph["observed_date"] <= SEED_END_DATE
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Coupons
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_coupons_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
products = generate_products(fake)
|
||||
coupons = generate_coupons(fake, products, stores)
|
||||
assert len(coupons) == NUM_COUPONS
|
||||
|
||||
|
||||
def test_generate_coupons_mix() -> None:
|
||||
"""Verify ~60% expired and ~40% active."""
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
stores = generate_stores()
|
||||
products = generate_products(fake)
|
||||
coupons = generate_coupons(fake, products, stores)
|
||||
|
||||
expired = [c for c in coupons if c["valid_to"] < SEED_END_DATE]
|
||||
active = [c for c in coupons if c["valid_to"] >= SEED_END_DATE]
|
||||
# Allow ±15% variance from target
|
||||
assert len(expired) / NUM_COUPONS > 0.45
|
||||
assert len(active) / NUM_COUPONS > 0.25
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Shrinkflation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_generate_shrinkflation_count() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
events = generate_shrinkflation_events(products)
|
||||
assert len(events) == NUM_SHRINKFLATION_EVENTS
|
||||
|
||||
|
||||
def test_generate_shrinkflation_fk() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
events = generate_shrinkflation_events(products)
|
||||
product_ids = {p["id"] for p in products}
|
||||
for event in events:
|
||||
assert event["normalized_product_id"] in product_ids
|
||||
|
||||
|
||||
def test_generate_shrinkflation_price_held_or_increased() -> None:
|
||||
"""Validate shrinkflation: new_size < old_size, price maintained or up."""
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
events = generate_shrinkflation_events(products)
|
||||
for event in events:
|
||||
old_size = float(event["old_size"])
|
||||
new_size = float(event["new_size"])
|
||||
assert new_size < old_size, f"Expected size reduction: {old_size} -> {new_size}"
|
||||
if event["price_at_old_size"] and event["price_at_new_size"]:
|
||||
# Price should be maintained or increased (not significantly dropped)
|
||||
assert float(event["price_at_new_size"]) >= float(event["price_at_old_size"]) * 0.95
|
||||
|
||||
|
||||
def test_generate_shrinkflation_confidence_range() -> None:
|
||||
_seed()
|
||||
fake = _make_fake()
|
||||
products = generate_products(fake)
|
||||
events = generate_shrinkflation_events(products)
|
||||
for event in events:
|
||||
assert 0 <= float(event["confidence"]) <= 1.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dry-run smoke test
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_dry_run_does_not_raise() -> None:
|
||||
"""Smoke test the full run_seed in dry-run mode."""
|
||||
from cartsnitch_common.seed.runner import run_seed
|
||||
|
||||
run_seed(dry_run=True, seed_value=SEED_VALUE)
|
||||
Reference in New Issue
Block a user