Merge commit '4cf6f91e954b770198578bcb8db5d98ac964bfed' as 'common'

This commit is contained in:
Coupon Carl
2026-03-28 02:24:14 +00:00
66 changed files with 7044 additions and 0 deletions
View File
+24
View File
@@ -0,0 +1,24 @@
"""Shared test fixtures for cartsnitch-common tests."""
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from cartsnitch_common.models.base import Base
@pytest.fixture
def engine():
"""In-memory SQLite engine for unit tests."""
eng = create_engine("sqlite:///:memory:")
Base.metadata.create_all(eng)
yield eng
eng.dispose()
@pytest.fixture
def session(engine):
"""SQLAlchemy session bound to in-memory SQLite."""
factory = sessionmaker(bind=engine)
with factory() as sess:
yield sess
+376
View File
@@ -0,0 +1,376 @@
"""Tests for SQLAlchemy ORM models."""
import uuid
from datetime import UTC, date, datetime
from decimal import Decimal
import pytest
from sqlalchemy import inspect
from cartsnitch_common.constants import (
AccountStatus,
DiscountType,
PriceSource,
ProductCategory,
SizeUnit,
StoreSlug,
)
from cartsnitch_common.models import (
Coupon,
NormalizedProduct,
PriceHistory,
Purchase,
PurchaseItem,
ShrinkflationEvent,
Store,
StoreLocation,
User,
UserStoreAccount,
)
class TestTableCreation:
"""Verify all expected tables are created."""
def test_all_tables_exist(self, engine):
inspector = inspect(engine)
table_names = set(inspector.get_table_names())
expected = {
"stores",
"store_locations",
"users",
"user_store_accounts",
"purchases",
"purchase_items",
"normalized_products",
"price_history",
"coupons",
"shrinkflation_events",
}
assert expected.issubset(table_names)
def test_ten_tables_total(self, engine):
inspector = inspect(engine)
assert len(inspector.get_table_names()) == 10
class TestUUIDPrimaryKeys:
"""All models use UUID PKs."""
def test_store_uuid_pk(self, session):
store = Store(
id=uuid.uuid4(),
name="Meijer",
slug=StoreSlug.MEIJER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(store)
session.commit()
assert isinstance(store.id, uuid.UUID)
def test_user_uuid_pk(self, session):
user = User(
id=uuid.uuid4(),
email="test@example.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(user)
session.commit()
assert isinstance(user.id, uuid.UUID)
class TestStoreModel:
def test_store_slug_enum(self, session):
store = Store(
id=uuid.uuid4(),
name="Kroger",
slug=StoreSlug.KROGER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(store)
session.commit()
assert store.slug == StoreSlug.KROGER
def test_store_unique_slug(self, session):
s1 = Store(
id=uuid.uuid4(),
name="Target",
slug=StoreSlug.TARGET,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
s2 = Store(
id=uuid.uuid4(),
name="Target Duplicate",
slug=StoreSlug.TARGET,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(s1)
session.commit()
session.add(s2)
with pytest.raises(Exception): # noqa: B017
session.commit()
session.rollback()
class TestStoreLocationModel:
def test_store_location_fields(self, session):
store = Store(
id=uuid.uuid4(),
name="Meijer",
slug=StoreSlug.MEIJER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(store)
session.flush()
loc = StoreLocation(
id=uuid.uuid4(),
store_id=store.id,
address="123 Main St",
city="Ann Arbor",
state="MI",
zip="48104",
lat=42.2808,
lng=-83.7430,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(loc)
session.commit()
assert loc.city == "Ann Arbor"
assert loc.lat == pytest.approx(42.2808)
class TestUserStoreAccountModel:
def test_account_status_enum(self, session):
user = User(
id=uuid.uuid4(),
email="test@test.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
store = Store(
id=uuid.uuid4(),
name="Kroger",
slug=StoreSlug.KROGER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add_all([user, store])
session.flush()
acct = UserStoreAccount(
id=uuid.uuid4(),
user_id=user.id,
store_id=store.id,
status=AccountStatus.ACTIVE,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(acct)
session.commit()
assert acct.status == AccountStatus.ACTIVE
def test_unique_user_store_constraint(self, session):
"""One account per user per store."""
user = User(
id=uuid.uuid4(),
email="unique@test.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
store = Store(
id=uuid.uuid4(),
name="Target",
slug=StoreSlug.TARGET,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add_all([user, store])
session.flush()
a1 = UserStoreAccount(
id=uuid.uuid4(),
user_id=user.id,
store_id=store.id,
status=AccountStatus.ACTIVE,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
a2 = UserStoreAccount(
id=uuid.uuid4(),
user_id=user.id,
store_id=store.id,
status=AccountStatus.EXPIRED,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(a1)
session.commit()
session.add(a2)
with pytest.raises(Exception): # noqa: B017
session.commit()
session.rollback()
class TestPurchaseModel:
def test_purchase_with_items(self, session):
user = User(
id=uuid.uuid4(),
email="buyer@test.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
store = Store(
id=uuid.uuid4(),
name="Meijer",
slug=StoreSlug.MEIJER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add_all([user, store])
session.flush()
purchase = Purchase(
id=uuid.uuid4(),
user_id=user.id,
store_id=store.id,
receipt_id="RCP-001",
purchase_date=date(2026, 3, 15),
total=Decimal("42.50"),
ingested_at=datetime.now(UTC),
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(purchase)
session.flush()
item = PurchaseItem(
id=uuid.uuid4(),
purchase_id=purchase.id,
product_name_raw="Meijer Whole Milk 1 Gallon",
upc="0041250000001",
quantity=Decimal("1"),
unit_price=Decimal("3.49"),
extended_price=Decimal("3.49"),
)
session.add(item)
session.commit()
assert item.product_name_raw == "Meijer Whole Milk 1 Gallon"
assert item.unit_price == Decimal("3.49")
class TestNormalizedProductModel:
def test_product_with_upc_variants(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk, 1 Gallon",
category=ProductCategory.DAIRY,
brand="Store Brand",
size="128",
size_unit=SizeUnit.FL_OZ,
upc_variants=["0041250000001", "0041250000002"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
assert product.category == ProductCategory.DAIRY
assert product.size_unit == SizeUnit.FL_OZ
class TestPriceHistoryModel:
def test_price_source_enum(self, session):
store = Store(
id=uuid.uuid4(),
name="Kroger",
slug=StoreSlug.KROGER,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Eggs, Large, 12ct",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add_all([store, product])
session.flush()
ph = PriceHistory(
id=uuid.uuid4(),
normalized_product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("4.99"),
sale_price=Decimal("3.99"),
source=PriceSource.RECEIPT,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(ph)
session.commit()
assert ph.source == PriceSource.RECEIPT
assert ph.regular_price == Decimal("4.99")
class TestCouponModel:
def test_coupon_discount_types(self, session):
store = Store(
id=uuid.uuid4(),
name="Target",
slug=StoreSlug.TARGET,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(store)
session.flush()
coupon = Coupon(
id=uuid.uuid4(),
store_id=store.id,
title="$2 off eggs",
discount_type=DiscountType.FIXED,
discount_value=Decimal("2.00"),
requires_clip=True,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(coupon)
session.commit()
assert coupon.discount_type == DiscountType.FIXED
assert coupon.discount_value == Decimal("2.00")
class TestShrinkflationEventModel:
def test_shrinkflation_event(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Cereal, Honey Oats",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.flush()
event = ShrinkflationEvent(
id=uuid.uuid4(),
normalized_product_id=product.id,
detected_date=date(2026, 3, 10),
old_size="18",
new_size="15.4",
old_unit=SizeUnit.OZ,
new_unit=SizeUnit.OZ,
price_at_old_size=Decimal("4.99"),
price_at_new_size=Decimal("4.99"),
confidence=Decimal("0.95"),
notes="Size reduced by 14.4%, price unchanged",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(event)
session.commit()
assert event.confidence == Decimal("0.95")
assert event.old_unit == SizeUnit.OZ
+157
View File
@@ -0,0 +1,157 @@
"""Tests for product normalization module."""
import uuid
from datetime import UTC, datetime
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.normalization import (
MatchMethod,
clean_name,
extract_size_info,
jaccard_similarity,
match_by_name,
match_by_upc,
normalize_product,
)
class TestCleanName:
def test_lowercase(self):
assert clean_name("Kroger WHOLE MILK") == "kroger whole milk"
def test_removes_size_info(self):
assert "oz" not in clean_name("Milk 16 oz Whole")
def test_removes_noise_words(self):
cleaned = clean_name("The Original Brand Milk")
assert "the" not in cleaned.split()
assert "original" not in cleaned.split()
assert "brand" not in cleaned.split()
def test_collapses_whitespace(self):
assert " " not in clean_name("Milk Whole Gallon")
def test_removes_punctuation(self):
cleaned = clean_name("Meijer's Best (Organic) Milk!")
assert "'" not in cleaned
assert "(" not in cleaned
class TestExtractSizeInfo:
def test_extracts_oz(self):
result = extract_size_info("Cereal 18 oz box")
assert result == ("18", "oz")
def test_extracts_fl_oz(self):
result = extract_size_info("Juice 64 fl oz")
assert result == ("64", "fl_oz")
def test_extracts_lb(self):
result = extract_size_info("Ground Beef 1.5 lb")
assert result == ("1.5", "lb")
def test_extracts_ct(self):
result = extract_size_info("Eggs Large 12 ct")
assert result == ("12", "ct")
def test_no_size_returns_none(self):
assert extract_size_info("Bananas") is None
class TestJaccardSimilarity:
def test_identical_strings(self):
assert jaccard_similarity("whole milk gallon", "whole milk gallon") == 1.0
def test_completely_different(self):
assert jaccard_similarity("apple juice", "ground beef") == 0.0
def test_partial_overlap(self):
score = jaccard_similarity("kroger whole milk", "meijer whole milk")
assert 0.4 < score < 0.8 # "whole" and "milk" overlap
def test_empty_strings(self):
assert jaccard_similarity("", "") == 0.0
assert jaccard_similarity("milk", "") == 0.0
class TestMatchByUPC:
def test_match_found(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk, Gallon",
upc_variants=["0041250000001", "0041250000002"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
# SQLite doesn't support JSONB containment — this will raise
# In production (PostgreSQL), this would work
result = match_by_upc(session, "0041250000001")
assert result is not None
assert result.method == MatchMethod.UPC
assert result.confidence == 1.0
def test_no_match(self, session):
result = match_by_upc(session, "9999999999999")
assert result is None
class TestMatchByName:
def test_exact_name_match(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk, Gallon",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
result = match_by_name(session, "Whole Milk Gallon")
assert result is not None
assert result.method == MatchMethod.NAME
assert result.confidence > 0.5
def test_fuzzy_match(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Kroger Whole Milk, 1 Gallon",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
result = match_by_name(session, "Meijer Whole Milk 1 Gallon", threshold=0.3)
assert result is not None
assert result.confidence > 0.3
def test_no_match_below_threshold(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Ground Beef 80/20",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
result = match_by_name(session, "Apple Juice 64 oz", threshold=0.5)
assert result is None
class TestNormalizeProduct:
def test_name_fallback(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Large Eggs, 12 count",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
result = normalize_product(session, "Large Eggs 12 ct", upc=None)
assert result is not None
assert result.method == MatchMethod.NAME
def test_no_match(self, session):
result = normalize_product(session, "Nonexistent Product XYZ", upc=None)
assert result is None
+949
View File
@@ -0,0 +1,949 @@
"""End-to-end integration tests for the data pipeline.
Tests the full flow: scraper output → normalization → product matching → DB storage
→ price tracking → shrinkflation detection → event publishing.
Uses real test fixtures with an in-memory SQLite database, not mocks.
"""
import uuid
from datetime import date
from decimal import Decimal
from unittest.mock import MagicMock
import pytest
from sqlalchemy import create_engine, select
from sqlalchemy.orm import Session, sessionmaker
from cartsnitch_common.constants import (
EventType,
SizeUnit,
StoreSlug,
)
from cartsnitch_common.events import publish_event
from cartsnitch_common.models import (
Base,
NormalizedProduct,
PriceHistory,
Purchase,
PurchaseItem,
ShrinkflationEvent,
Store,
User,
)
from cartsnitch_common.pipeline.matching import ProductMatcher
from cartsnitch_common.pipeline.price_tracking import (
PriceDelta,
get_price_trend,
record_price_from_item,
)
from cartsnitch_common.pipeline.receipt import normalize_receipt, parse_meijer_item
from cartsnitch_common.pipeline.shrinkflation import detect_shrinkflation
from cartsnitch_common.schemas.events import EventEnvelope
from cartsnitch_common.schemas.purchase import PurchaseCreate
# ---------------------------------------------------------------------------
# Fixtures: realistic scraper output from Meijer
# ---------------------------------------------------------------------------
MEIJER_RECEIPT_FIXTURE = {
"receiptId": "MJ-2026-03-15-00042",
"date": "2026-03-15",
"total": "47.82",
"subtotal": "44.50",
"taxAmount": "3.32",
"totalSavings": "6.20",
"items": [
{
"description": " Meijer Whole Milk 1 Gallon ",
"upcCode": "00041250010001",
"quantity": 1,
"unitPrice": "3.29",
"extendedPrice": "3.29",
"regularPrice": "3.49",
"salePrice": "3.29",
"category": "Dairy",
},
{
"name": "BARILLA SPAGHETTI 16 OZ",
"upc": "076808280753",
"qty": 2,
"price": "1.69",
"totalPrice": "3.38",
"regularPrice": "1.89",
"couponDiscount": "0.40",
"department": "Pantry",
},
{
"description": "Meijer Lean Ground Beef 1 lb",
"upcCode": "00041250022004",
"quantity": 1,
"unitPrice": "5.99",
"extendedPrice": "5.99",
"regularPrice": "6.49",
"loyaltyDiscount": "0.50",
"category": "Meat",
},
{
"description": "Cheerios Original 12 oz",
"upcCode": "016000275645",
"quantity": 1,
"unitPrice": "4.49",
"extendedPrice": "4.49",
"regularPrice": "4.49",
"category": "Snacks",
},
{
"description": "Fresh Bananas",
"quantity": 1,
"unitPrice": "0.69",
"extendedPrice": "0.69",
"category": "Produce",
},
],
}
MEIJER_RECEIPT_SECOND_VISIT = {
"receiptId": "MJ-2026-03-18-00099",
"date": "2026-03-18",
"total": "12.47",
"items": [
{
"description": "Meijer Whole Milk 1 Gallon",
"upcCode": "00041250010001",
"quantity": 1,
"unitPrice": "3.49",
"extendedPrice": "3.49",
"regularPrice": "3.49",
"category": "Dairy",
},
{
"description": "BARILLA SPAGHETTI 16 OZ",
"upc": "076808280753",
"qty": 1,
"price": "1.99",
"totalPrice": "1.99",
"regularPrice": "1.99",
"department": "Pantry",
},
{
"description": "Cheerios Original 10.8 oz",
"upcCode": "016000275645",
"quantity": 1,
"unitPrice": "4.49",
"extendedPrice": "4.49",
"regularPrice": "4.49",
"category": "Snacks",
},
],
}
@pytest.fixture
def e2e_engine():
"""In-memory SQLite engine for E2E tests."""
eng = create_engine("sqlite:///:memory:")
Base.metadata.create_all(eng)
yield eng
eng.dispose()
@pytest.fixture
def e2e_session(e2e_engine):
"""SQLAlchemy session with pre-seeded store and user."""
factory = sessionmaker(bind=e2e_engine)
with factory() as sess:
yield sess
@pytest.fixture
def store(e2e_session: Session) -> Store:
"""Seed a Meijer store."""
s = Store(id=uuid.uuid4(), name="Meijer", slug=StoreSlug.MEIJER)
e2e_session.add(s)
e2e_session.flush()
return s
@pytest.fixture
def user(e2e_session: Session) -> User:
"""Seed a test user."""
u = User(
id=uuid.uuid4(),
email="tester@cartsnitch.com",
hashed_password="hashed_test_password",
display_name="Test User",
)
e2e_session.add(u)
e2e_session.flush()
return u
@pytest.fixture
def redis_mock():
"""A lightweight Redis mock that captures published messages."""
client = MagicMock()
published: list[tuple[str, str]] = []
def _publish(channel: str, message: str) -> int:
published.append((channel, message))
return 1
client.publish = MagicMock(side_effect=_publish)
client._published = published
return client
# ===========================================================================
# Test class: Full pipeline E2E — scraper → normalization → matching → storage
# ===========================================================================
class TestFullPipelineE2E:
"""Scraper output → normalize_receipt → ProductMatcher → DB storage."""
def test_normalize_meijer_receipt(self, user: User, store: Store):
"""Raw Meijer receipt normalizes into a valid PurchaseCreate."""
purchase = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
assert isinstance(purchase, PurchaseCreate)
assert purchase.receipt_id == "MJ-2026-03-15-00042"
assert purchase.purchase_date == date(2026, 3, 15)
assert purchase.total == Decimal("47.82")
assert purchase.subtotal == Decimal("44.50")
assert purchase.tax == Decimal("3.32")
assert purchase.savings_total == Decimal("6.20")
assert len(purchase.items) == 5
assert purchase.raw_data == MEIJER_RECEIPT_FIXTURE
def test_item_field_normalization(self, user: User, store: Store):
"""Items parse correctly regardless of field name variants."""
purchase = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
# Item using 'description' / 'upcCode' fields
milk = purchase.items[0]
assert milk.product_name_raw == "Meijer Whole Milk 1 Gallon"
assert milk.upc == "41250010001" # leading zeros stripped
assert milk.unit_price == Decimal("3.29")
# Item using 'name' / 'upc' / 'qty' / 'price' / 'totalPrice' fields
pasta = purchase.items[1]
assert pasta.product_name_raw == "BARILLA SPAGHETTI 16 OZ"
assert pasta.upc == "76808280753"
assert pasta.quantity == Decimal("2")
assert pasta.extended_price == Decimal("3.38")
assert pasta.coupon_discount == Decimal("0.40")
def test_upc_product_matching_and_storage(self, e2e_session: Session, user: User, store: Store):
"""Full flow: normalize → match → store in DB. UPC matching works E2E."""
purchase_schema = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
# Run product matching
matcher = ProductMatcher(e2e_session, auto_create=True)
outcomes = matcher.match_items(purchase_schema.items)
assert len(outcomes) == 5
# First item has a UPC — auto_create makes a new product
assert outcomes[0].created_new is True
# Store the purchase in DB
purchase_db = Purchase(
id=uuid.uuid4(),
user_id=user.id,
store_id=store.id,
receipt_id=purchase_schema.receipt_id,
purchase_date=purchase_schema.purchase_date,
total=purchase_schema.total,
subtotal=purchase_schema.subtotal,
tax=purchase_schema.tax,
savings_total=purchase_schema.savings_total,
raw_data=purchase_schema.raw_data,
)
e2e_session.add(purchase_db)
e2e_session.flush()
# Store items linked to the purchase and matched products
for _i, item_schema in enumerate(purchase_schema.items):
item_db = PurchaseItem(
id=uuid.uuid4(),
purchase_id=purchase_db.id,
product_name_raw=item_schema.product_name_raw,
upc=item_schema.upc,
quantity=item_schema.quantity,
unit_price=item_schema.unit_price,
extended_price=item_schema.extended_price,
regular_price=item_schema.regular_price,
sale_price=item_schema.sale_price,
coupon_discount=item_schema.coupon_discount,
loyalty_discount=item_schema.loyalty_discount,
category_raw=item_schema.category_raw,
)
e2e_session.add(item_db)
e2e_session.flush()
# Verify data persisted correctly
stored_purchase = e2e_session.execute(
select(Purchase).where(Purchase.receipt_id == "MJ-2026-03-15-00042")
).scalar_one()
assert stored_purchase.total == Decimal("47.82")
assert stored_purchase.user_id == user.id
assert stored_purchase.store_id == store.id
stored_items = (
e2e_session.execute(
select(PurchaseItem).where(PurchaseItem.purchase_id == stored_purchase.id)
)
.scalars()
.all()
)
assert len(stored_items) == 5
# Verify products were created in normalized_products table
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
assert len(products) == 5 # all 5 items auto-created products
def test_second_visit_reuses_existing_products(
self, e2e_session: Session, user: User, store: Store
):
"""On second receipt, products matched by UPC reuse existing records."""
# Ingest first receipt
first = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
matcher = ProductMatcher(e2e_session, auto_create=True)
matcher.match_items(first.items)
products_after_first = e2e_session.execute(select(NormalizedProduct)).scalars().all()
first_count = len(products_after_first)
# Ingest second receipt — overlapping UPCs
second = normalize_receipt(
MEIJER_RECEIPT_SECOND_VISIT,
user_id=str(user.id),
store_id=str(store.id),
)
second_outcomes = matcher.match_items(second.items)
# Milk, pasta, cheerios should match existing by UPC
assert second_outcomes[0].created_new is False # milk — UPC match
assert second_outcomes[1].created_new is False # pasta — UPC match
assert second_outcomes[2].created_new is False # cheerios — UPC match
products_after_second = e2e_session.execute(select(NormalizedProduct)).scalars().all()
assert len(products_after_second) == first_count # no new products created
# ===========================================================================
# Test class: Price tracking and shrinkflation detection E2E
# ===========================================================================
class TestPriceTrackingE2E:
"""Price recording from stored items and price delta detection."""
def test_price_recorded_from_ingested_receipt(
self, e2e_session: Session, user: User, store: Store
):
"""Ingest receipt → match products → record prices → verify price history."""
purchase_schema = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
matcher = ProductMatcher(e2e_session, auto_create=True)
outcomes = matcher.match_items(purchase_schema.items)
# Record prices for each matched item
price_entries = []
for i, item_schema in enumerate(purchase_schema.items):
product = outcomes[i].match.product if outcomes[i].match else None
if product is None:
# Was auto-created — find the product directly
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if p.canonical_name == item_schema.product_name_raw:
product = p
break
if product:
entry, delta = record_price_from_item(
e2e_session,
product_id=product.id,
store_id=store.id,
observed_date=purchase_schema.purchase_date,
regular_price=item_schema.regular_price or item_schema.unit_price,
sale_price=item_schema.sale_price,
)
price_entries.append((entry, delta))
# First ingestion — no deltas expected
assert all(delta is None for _, delta in price_entries)
# Verify price history stored
all_prices = e2e_session.execute(select(PriceHistory)).scalars().all()
assert len(all_prices) >= 4 # at least the items with regular_price
def test_price_increase_detected_on_second_receipt(
self, e2e_session: Session, user: User, store: Store
):
"""Second receipt with higher price triggers a PriceDelta."""
# Ingest first receipt
first = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
matcher = ProductMatcher(e2e_session, auto_create=True)
first_outcomes = matcher.match_items(first.items)
# Record first prices
for i, item_schema in enumerate(first.items):
product = first_outcomes[i].match.product if first_outcomes[i].match else None
if product is None:
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if p.canonical_name == item_schema.product_name_raw:
product = p
break
if product:
record_price_from_item(
e2e_session,
product_id=product.id,
store_id=store.id,
observed_date=first.purchase_date,
regular_price=item_schema.regular_price or item_schema.unit_price,
sale_price=item_schema.sale_price,
)
# Ingest second receipt — pasta price went up ($1.89 → $1.99)
second = normalize_receipt(
MEIJER_RECEIPT_SECOND_VISIT,
user_id=str(user.id),
store_id=str(store.id),
)
second_outcomes = matcher.match_items(second.items)
# Record second prices and capture deltas
deltas: list[PriceDelta] = []
for i, item_schema in enumerate(second.items):
product = second_outcomes[i].match.product if second_outcomes[i].match else None
if product is None:
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if p.canonical_name == item_schema.product_name_raw:
product = p
break
if product:
_, delta = record_price_from_item(
e2e_session,
product_id=product.id,
store_id=store.id,
observed_date=second.purchase_date,
regular_price=item_schema.regular_price or item_schema.unit_price,
sale_price=item_schema.sale_price,
)
if delta:
deltas.append(delta)
# Milk went from $3.49 → $3.49 (no change); pasta from $1.89 → $1.99 (increase)
price_increases = [d for d in deltas if d.is_increase]
assert len(price_increases) >= 1
pasta_delta = next(
(d for d in price_increases if d.old_price == Decimal("1.89")),
None,
)
assert pasta_delta is not None
assert pasta_delta.new_price == Decimal("1.99")
assert pasta_delta.change_amount == Decimal("0.10")
assert pasta_delta.is_increase is True
def test_price_trend_across_visits(self, e2e_session: Session, user: User, store: Store):
"""get_price_trend returns ordered history after multiple ingestions."""
# Create a product manually
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Test Product",
upc_variants=["1234567890"],
)
e2e_session.add(product)
e2e_session.flush()
# Record 3 prices on different dates
dates_prices = [
(date(2026, 3, 10), Decimal("2.99")),
(date(2026, 3, 13), Decimal("3.19")),
(date(2026, 3, 16), Decimal("2.79")),
]
for obs_date, price in dates_prices:
record_price_from_item(
e2e_session,
product_id=product.id,
store_id=store.id,
observed_date=obs_date,
regular_price=price,
)
trend = get_price_trend(e2e_session, product.id, store.id)
assert len(trend) == 3
# Newest first
assert trend[0].regular_price == Decimal("2.79")
assert trend[1].regular_price == Decimal("3.19")
assert trend[2].regular_price == Decimal("2.99")
class TestShrinkflationE2E:
"""Shrinkflation detection integrated with product matching."""
def test_shrinkflation_detected_from_receipt_data(
self, e2e_session: Session, user: User, store: Store
):
"""Cheerios went from 12 oz → 10.8 oz between receipts. Detect shrinkflation."""
# Ingest first receipt — creates Cheerios product with size from name
first = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
matcher = ProductMatcher(e2e_session, auto_create=True)
first_outcomes = matcher.match_items(first.items)
# Find the Cheerios product (index 3 in fixture)
cheerios_product = None
for outcome in first_outcomes:
if outcome.match and outcome.match.product:
p = outcome.match.product
else:
# Check auto-created products
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if "cheerios" in p.canonical_name.lower():
cheerios_product = p
break
if cheerios_product:
break
else:
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if "cheerios" in p.canonical_name.lower():
cheerios_product = p
break
assert cheerios_product is not None
# The auto-created product should have extracted "12" and "oz" from name
assert cheerios_product.size == "12"
assert cheerios_product.size_unit == SizeUnit.OZ
# Now detect shrinkflation: 12 oz → 10.8 oz
event = detect_shrinkflation(
e2e_session,
product=cheerios_product,
new_size="10.8",
new_unit=SizeUnit.OZ,
new_price=Decimal("4.49"),
detected_date=date(2026, 3, 18),
)
assert event is not None
assert isinstance(event, ShrinkflationEvent)
assert event.old_size == "12"
assert event.new_size == "10.8"
assert event.old_unit == SizeUnit.OZ
assert event.new_unit == SizeUnit.OZ
assert event.confidence >= Decimal("0.85") # 10% decrease → 0.95
# Verify stored in DB
stored = e2e_session.execute(
select(ShrinkflationEvent).where(
ShrinkflationEvent.normalized_product_id == cheerios_product.id
)
).scalar_one()
assert stored.id == event.id
def test_shrinkflation_dedup_on_repeat_detection(
self, e2e_session: Session, user: User, store: Store
):
"""Same shrinkflation detected twice returns the existing event, not a duplicate."""
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Brand X Cereal 15 oz",
size="15",
size_unit=SizeUnit.OZ,
upc_variants=["999888777"],
)
e2e_session.add(product)
e2e_session.flush()
first = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ)
second = detect_shrinkflation(e2e_session, product, new_size="13.5", new_unit=SizeUnit.OZ)
assert first is not None
assert second is not None
assert first.id == second.id # same event, not duplicated
count = len(
e2e_session.execute(
select(ShrinkflationEvent).where(
ShrinkflationEvent.normalized_product_id == product.id
)
)
.scalars()
.all()
)
assert count == 1
# ===========================================================================
# Test class: Event bus pub/sub for pipeline stage transitions
# ===========================================================================
class TestEventBusE2E:
"""Redis event publishing at each pipeline stage."""
def test_receipt_ingested_event(self, redis_mock, user: User, store: Store):
"""publish_event sends a valid EventEnvelope for RECEIPTS_INGESTED."""
purchase_schema = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
subscribers = publish_event(
redis_mock,
EventType.RECEIPTS_INGESTED,
service="receiptwitness",
payload={
"receipt_id": purchase_schema.receipt_id,
"user_id": str(user.id),
"store_slug": StoreSlug.MEIJER,
"item_count": len(purchase_schema.items),
"total": str(purchase_schema.total),
},
)
assert subscribers == 1
assert len(redis_mock._published) == 1
channel, raw_msg = redis_mock._published[0]
assert channel == EventType.RECEIPTS_INGESTED.value
# Deserialize and validate the envelope
envelope = EventEnvelope.model_validate_json(raw_msg)
assert envelope.event_type == EventType.RECEIPTS_INGESTED
assert envelope.service == "receiptwitness"
assert envelope.payload["receipt_id"] == "MJ-2026-03-15-00042"
assert envelope.payload["item_count"] == 5
def test_price_updated_event(self, redis_mock, user: User, store: Store):
"""publish_event sends a valid envelope for PRICES_UPDATED."""
subscribers = publish_event(
redis_mock,
EventType.PRICES_UPDATED,
service="cartsnitch-common",
payload={
"product_id": str(uuid.uuid4()),
"store_slug": StoreSlug.MEIJER,
"old_price": "1.89",
"new_price": "1.99",
"change_percent": "5.29",
},
)
assert subscribers == 1
channel, raw_msg = redis_mock._published[0]
assert channel == EventType.PRICES_UPDATED.value
envelope = EventEnvelope.model_validate_json(raw_msg)
assert envelope.event_type == EventType.PRICES_UPDATED
assert envelope.payload["old_price"] == "1.89"
def test_products_normalized_event(self, redis_mock, user: User, store: Store):
"""publish_event sends a valid envelope for PRODUCTS_NORMALIZED."""
product_id = str(uuid.uuid4())
subscribers = publish_event(
redis_mock,
EventType.PRODUCTS_NORMALIZED,
service="cartsnitch-common",
payload={
"product_id": product_id,
"canonical_name": "Barilla Spaghetti",
"match_method": "upc",
"confidence": "high",
},
)
assert subscribers == 1
channel, raw_msg = redis_mock._published[0]
assert channel == EventType.PRODUCTS_NORMALIZED.value
envelope = EventEnvelope.model_validate_json(raw_msg)
assert envelope.payload["confidence"] == "high"
def test_shrinkflation_alert_event(self, redis_mock, user: User, store: Store):
"""publish_event sends a valid envelope for ALERT_SHRINKFLATION."""
subscribers = publish_event(
redis_mock,
EventType.ALERT_SHRINKFLATION,
service="shrinkray",
payload={
"product_id": str(uuid.uuid4()),
"product_name": "Cheerios Original",
"old_size": "12 oz",
"new_size": "10.8 oz",
"confidence": "0.95",
},
)
assert subscribers == 1
channel, raw_msg = redis_mock._published[0]
assert channel == EventType.ALERT_SHRINKFLATION.value
def test_full_pipeline_emits_events_at_each_stage(
self, e2e_session: Session, redis_mock, user: User, store: Store
):
"""Full pipeline: ingest → match → record price → publish events at each stage."""
# Stage 1: Normalize receipt
purchase_schema = normalize_receipt(
MEIJER_RECEIPT_FIXTURE,
user_id=str(user.id),
store_id=str(store.id),
)
# Publish receipt ingested
publish_event(
redis_mock,
EventType.RECEIPTS_INGESTED,
service="receiptwitness",
payload={
"receipt_id": purchase_schema.receipt_id,
"item_count": len(purchase_schema.items),
},
)
# Stage 2: Match products
matcher = ProductMatcher(e2e_session, auto_create=True)
outcomes = matcher.match_items(purchase_schema.items)
for i, outcome in enumerate(outcomes):
product = outcome.match.product if outcome.match else None
if product is None:
# Auto-created — look up by name
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if p.canonical_name == purchase_schema.items[i].product_name_raw:
product = p
break
if product is None:
continue
publish_event(
redis_mock,
EventType.PRODUCTS_NORMALIZED,
service="cartsnitch-common",
payload={
"product_id": str(product.id),
"match_method": outcome.match.method.value if outcome.match else "auto_create",
"confidence": outcome.confidence_level.value,
},
)
# Stage 3: Record prices
for i, item_schema in enumerate(purchase_schema.items):
product = outcomes[i].match.product if outcomes[i].match else None
if product is None:
products = e2e_session.execute(select(NormalizedProduct)).scalars().all()
for p in products:
if p.canonical_name == item_schema.product_name_raw:
product = p
break
if product:
_, delta = record_price_from_item(
e2e_session,
product_id=product.id,
store_id=store.id,
observed_date=purchase_schema.purchase_date,
regular_price=item_schema.regular_price or item_schema.unit_price,
)
if delta and delta.is_increase:
publish_event(
redis_mock,
EventType.ALERT_PRICE_INCREASE,
service="stickershock",
payload={
"product_id": str(product.id),
"old_price": str(delta.old_price),
"new_price": str(delta.new_price),
},
)
# Verify events published at each stage
channels = [ch for ch, _ in redis_mock._published]
assert EventType.RECEIPTS_INGESTED.value in channels
assert EventType.PRODUCTS_NORMALIZED.value in channels
# No price increases on first receipt, so no ALERT_PRICE_INCREASE expected
# All messages are valid EventEnvelopes
for _, raw_msg in redis_mock._published:
envelope = EventEnvelope.model_validate_json(raw_msg)
assert envelope.timestamp is not None
assert envelope.service
# ===========================================================================
# Test class: Error handling for malformed scraper output
# ===========================================================================
class TestMalformedScraperOutput:
"""Error handling for bad, partial, or unexpected scraper data."""
def test_missing_item_name_produces_empty_string(self):
"""Item with no description/name field normalizes with empty product_name_raw."""
item = parse_meijer_item({"unitPrice": "2.99"})
assert item.product_name_raw == ""
assert item.unit_price == Decimal("2.99")
def test_missing_price_defaults_to_zero(self):
"""Item with no price fields defaults to zero."""
item = parse_meijer_item({"description": "Mystery Product"})
assert item.unit_price == Decimal("0")
assert item.extended_price == Decimal("0")
def test_non_numeric_price_defaults_to_zero(self):
"""Non-numeric price strings safely default to zero."""
item = parse_meijer_item(
{
"description": "Bad Price Item",
"unitPrice": "not_a_number",
"extendedPrice": "$$$.xx",
}
)
assert item.unit_price == Decimal("0")
assert item.extended_price == Decimal("0")
def test_empty_receipt_produces_empty_items(self, user: User, store: Store):
"""Receipt with no items normalizes cleanly."""
raw = {"receiptId": "EMPTY-001", "date": "2026-03-15", "total": "0.00"}
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
assert purchase.receipt_id == "EMPTY-001"
assert purchase.total == Decimal("0.00")
assert len(purchase.items) == 0
def test_receipt_missing_date_defaults_to_today(self, user: User, store: Store):
"""Receipt with no date field defaults to today."""
raw = {"receiptId": "NO-DATE-001", "total": "5.00", "items": []}
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
assert purchase.purchase_date == date.today()
def test_receipt_missing_id_generates_uuid(self, user: User, store: Store):
"""Receipt with no ID generates a UUID."""
raw = {"date": "2026-03-15", "total": "10.00", "items": []}
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
# Should be a valid UUID string
uuid.UUID(purchase.receipt_id)
def test_item_with_garbage_upc_preserves_it(self):
"""UPC field with non-standard content is preserved as-is after strip."""
item = parse_meijer_item(
{
"description": "Weird UPC Product",
"upc": " ABC-NOT-A-UPC ",
"unitPrice": "1.99",
}
)
# lstrip("0") on "ABC-NOT-A-UPC" leaves it intact
assert item.upc == "ABC-NOT-A-UPC"
def test_negative_prices_pass_through(self):
"""Negative prices (refunds) are preserved, not zeroed."""
item = parse_meijer_item(
{
"description": "Refund Item",
"unitPrice": "-5.99",
"extendedPrice": "-5.99",
}
)
assert item.unit_price == Decimal("-5.99")
assert item.extended_price == Decimal("-5.99")
def test_extended_price_auto_calculated(self):
"""When extendedPrice is missing, it's calculated from unitPrice * quantity."""
item = parse_meijer_item(
{
"description": "No Extended",
"unitPrice": "2.50",
"quantity": "3",
}
)
assert item.extended_price == Decimal("7.50")
def test_matching_with_malformed_items(self, e2e_session: Session):
"""ProductMatcher handles items with missing/empty names gracefully."""
matcher = ProductMatcher(e2e_session, auto_create=True)
bad_items = [
parse_meijer_item({"description": "", "unitPrice": "1.00"}),
parse_meijer_item({"unitPrice": "2.00"}),
]
outcomes = matcher.match_items(bad_items)
assert len(outcomes) == 2
# Both should auto-create (no match possible for empty names)
assert all(o.created_new for o in outcomes)
def test_completely_empty_receipt(self, user: User, store: Store):
"""Totally empty dict produces a valid PurchaseCreate with defaults."""
purchase = normalize_receipt({}, user_id=str(user.id), store_id=str(store.id))
assert purchase.total == Decimal("0")
assert len(purchase.items) == 0
assert purchase.purchase_date == date.today()
def test_mixed_valid_and_malformed_items(self, user: User, store: Store):
"""Receipt with a mix of good and bad items processes all of them."""
raw = {
"receiptId": "MIX-001",
"date": "2026-03-15",
"total": "10.00",
"items": [
{
"description": "Good Product 8 oz",
"upc": "1234567890",
"unitPrice": "3.99",
"extendedPrice": "3.99",
},
{
"unitPrice": "not_a_price",
},
{
"description": " *** Special Chars !!! ",
"unitPrice": "2.50",
},
],
}
purchase = normalize_receipt(raw, user_id=str(user.id), store_id=str(store.id))
assert len(purchase.items) == 3
# Good item
assert purchase.items[0].product_name_raw == "Good Product 8 oz"
assert purchase.items[0].upc == "1234567890"
# Bad price item
assert purchase.items[1].unit_price == Decimal("0")
# Special chars stripped
assert purchase.items[2].product_name_raw == "Special Chars"
+160
View File
@@ -0,0 +1,160 @@
"""Tests for product matching & dedup pipeline."""
import uuid
from datetime import UTC, datetime
from decimal import Decimal
from cartsnitch_common.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.normalization import MatchMethod
from cartsnitch_common.pipeline.matching import (
ProductMatcher,
classify_confidence,
match_purchase_item,
)
from cartsnitch_common.schemas.purchase import PurchaseItemCreate
class TestClassifyConfidence:
def test_upc_always_high(self):
assert classify_confidence(1.0, MatchMethod.UPC) == MatchConfidence.HIGH
assert classify_confidence(0.5, MatchMethod.UPC) == MatchConfidence.HIGH
def test_name_high(self):
assert classify_confidence(0.9, MatchMethod.NAME) == MatchConfidence.HIGH
assert classify_confidence(0.8, MatchMethod.NAME) == MatchConfidence.HIGH
def test_name_medium(self):
assert classify_confidence(0.6, MatchMethod.NAME) == MatchConfidence.MEDIUM
assert classify_confidence(0.5, MatchMethod.NAME) == MatchConfidence.MEDIUM
def test_name_low(self):
assert classify_confidence(0.3, MatchMethod.NAME) == MatchConfidence.LOW
assert classify_confidence(0.0, MatchMethod.NAME) == MatchConfidence.LOW
class TestProductMatcher:
def _make_item(self, name: str, upc: str | None = None) -> PurchaseItemCreate:
return PurchaseItemCreate(
product_name_raw=name,
upc=upc,
unit_price=Decimal("3.99"),
extended_price=Decimal("3.99"),
)
def test_match_by_upc(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
upc_variants=["041250000001"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
item = self._make_item("Kroger Milk", upc="041250000001")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert prod.id == product.id
assert result is not None
assert result.method == MatchMethod.UPC
assert confidence == MatchConfidence.HIGH
def test_match_by_name(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session, name_threshold=0.3)
item = self._make_item("Whole Milk Gallon Size")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is not None
assert result.method == MatchMethod.NAME
def test_auto_create_when_no_match(self, session):
matcher = ProductMatcher(session, auto_create=True)
item = self._make_item("Unique Product XYZ 16 oz")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is None # No match found, was created
assert confidence == MatchConfidence.LOW
assert prod.canonical_name == "Unique Product XYZ 16 oz"
assert prod.size == "16"
assert prod.size_unit == "oz"
def test_no_create_when_disabled(self, session):
matcher = ProductMatcher(session, auto_create=False)
item = self._make_item("Nonexistent Product")
prod, result, confidence = matcher.match_single(item)
assert prod is None
assert result is None
def test_batch_match(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Large Eggs 12 Count",
upc_variants=["012345"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
items = [
self._make_item("Large Eggs", upc="012345"),
self._make_item("Brand New Never Seen Product"),
]
outcomes = matcher.match_items(items)
assert len(outcomes) == 2
assert outcomes[0].match is not None
assert outcomes[0].confidence_level == MatchConfidence.HIGH
assert outcomes[0].created_new is False
assert outcomes[1].match is None
assert outcomes[1].created_new is True
class TestMatchPurchaseItem:
def test_convenience_function(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Ground Beef 80/20",
upc_variants=["999888"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
item = PurchaseItemCreate(
product_name_raw="Ground Beef",
upc="999888",
unit_price=Decimal("5.99"),
extended_price=Decimal("5.99"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.HIGH
def test_auto_create_default(self, session):
item = PurchaseItemCreate(
product_name_raw="Totally New Item",
unit_price=Decimal("1.00"),
extended_price=Decimal("1.00"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.LOW
+282
View File
@@ -0,0 +1,282 @@
"""Tests for price history tracking pipeline."""
import uuid
from datetime import UTC, date, datetime
from decimal import Decimal
from cartsnitch_common.constants import PriceSource, StoreSlug
from cartsnitch_common.models.price import PriceHistory
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.models.store import Store
from cartsnitch_common.pipeline.price_tracking import (
PriceDelta,
get_latest_price,
get_price_trend,
record_price_from_item,
)
def _make_store(session, slug=StoreSlug.MEIJER) -> Store:
store = Store(
id=uuid.uuid4(),
name="Meijer",
slug=slug,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(store)
session.flush()
return store
def _make_product(session, name="Test Product") -> NormalizedProduct:
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name=name,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.flush()
return product
class TestGetLatestPrice:
def test_no_history(self, session):
product = _make_product(session)
store = _make_store(session)
result = get_latest_price(session, product.id, store.id)
assert result is None
def test_returns_newest(self, session):
product = _make_product(session)
store = _make_store(session)
# Add two entries
old = PriceHistory(
id=uuid.uuid4(),
normalized_product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 1),
regular_price=Decimal("3.99"),
source=PriceSource.RECEIPT,
)
new = PriceHistory(
id=uuid.uuid4(),
normalized_product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 10),
regular_price=Decimal("4.29"),
source=PriceSource.RECEIPT,
)
session.add_all([old, new])
session.flush()
result = get_latest_price(session, product.id, store.id)
assert result is not None
assert result.regular_price == Decimal("4.29")
class TestRecordPriceFromItem:
def test_first_price_no_delta(self, session):
product = _make_product(session)
store = _make_store(session)
entry, delta = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("3.99"),
)
assert entry is not None
assert entry.regular_price == Decimal("3.99")
assert entry.source == PriceSource.RECEIPT
assert delta is None
def test_price_increase_detected(self, session):
product = _make_product(session)
store = _make_store(session)
# First price
record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 1),
regular_price=Decimal("3.99"),
)
# Price increase
entry, delta = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("4.49"),
)
assert delta is not None
assert delta.old_price == Decimal("3.99")
assert delta.new_price == Decimal("4.49")
assert delta.change_amount == Decimal("0.50")
assert delta.is_increase is True
assert delta.is_decrease is False
assert delta.change_percent > Decimal("0")
def test_price_decrease_detected(self, session):
product = _make_product(session)
store = _make_store(session)
record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 1),
regular_price=Decimal("5.00"),
)
_, delta = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("4.00"),
)
assert delta is not None
assert delta.is_decrease is True
assert delta.change_amount == Decimal("-1.00")
def test_same_price_no_delta(self, session):
product = _make_product(session)
store = _make_store(session)
record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 1),
regular_price=Decimal("3.99"),
)
_, delta = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("3.99"),
)
assert delta is None
def test_sale_and_loyalty_prices_recorded(self, session):
product = _make_product(session)
store = _make_store(session)
entry, _ = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("5.99"),
sale_price=Decimal("4.99"),
loyalty_price=Decimal("4.49"),
coupon_price=Decimal("3.99"),
)
assert entry.sale_price == Decimal("4.99")
assert entry.loyalty_price == Decimal("4.49")
assert entry.coupon_price == Decimal("3.99")
def test_custom_source(self, session):
product = _make_product(session)
store = _make_store(session)
entry, _ = record_price_from_item(
session,
product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, 15),
regular_price=Decimal("3.99"),
source=PriceSource.CATALOG,
)
assert entry.source == PriceSource.CATALOG
class TestGetPriceTrend:
def test_empty_trend(self, session):
product = _make_product(session)
store = _make_store(session)
trend = get_price_trend(session, product.id, store.id)
assert trend == []
def test_returns_newest_first(self, session):
product = _make_product(session)
store = _make_store(session)
for day in [1, 5, 10, 15]:
session.add(
PriceHistory(
id=uuid.uuid4(),
normalized_product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, day),
regular_price=Decimal(str(3 + day * 0.1)),
source=PriceSource.RECEIPT,
)
)
session.flush()
trend = get_price_trend(session, product.id, store.id)
assert len(trend) == 4
assert trend[0].observed_date == date(2026, 3, 15)
assert trend[-1].observed_date == date(2026, 3, 1)
def test_respects_limit(self, session):
product = _make_product(session)
store = _make_store(session)
for day in range(1, 11):
session.add(
PriceHistory(
id=uuid.uuid4(),
normalized_product_id=product.id,
store_id=store.id,
observed_date=date(2026, 3, day),
regular_price=Decimal("3.99"),
source=PriceSource.RECEIPT,
)
)
session.flush()
trend = get_price_trend(session, product.id, store.id, limit=3)
assert len(trend) == 3
class TestPriceDelta:
def test_delta_properties(self):
delta = PriceDelta(
product_id=uuid.uuid4(),
store_id=uuid.uuid4(),
old_price=Decimal("3.99"),
new_price=Decimal("4.49"),
change_amount=Decimal("0.50"),
change_percent=Decimal("12.53"),
old_date=date(2026, 3, 1),
new_date=date(2026, 3, 15),
)
assert delta.is_increase is True
assert delta.is_decrease is False
def test_decrease_properties(self):
delta = PriceDelta(
product_id=uuid.uuid4(),
store_id=uuid.uuid4(),
old_price=Decimal("4.49"),
new_price=Decimal("3.99"),
change_amount=Decimal("-0.50"),
change_percent=Decimal("-11.14"),
old_date=date(2026, 3, 1),
new_date=date(2026, 3, 15),
)
assert delta.is_decrease is True
assert delta.is_increase is False
+204
View File
@@ -0,0 +1,204 @@
"""Tests for receipt normalization pipeline."""
import uuid
from datetime import date
from decimal import Decimal
from cartsnitch_common.pipeline.receipt import (
_clean_product_name,
_safe_decimal,
normalize_receipt,
parse_meijer_item,
)
class TestCleanProductName:
def test_strips_whitespace(self):
assert _clean_product_name(" Milk ") == "Milk"
def test_removes_leading_punctuation(self):
assert _clean_product_name("---Milk---") == "Milk"
def test_collapses_internal_whitespace(self):
assert _clean_product_name("Whole Milk Gallon") == "Whole Milk Gallon"
def test_empty_string(self):
assert _clean_product_name("") == ""
class TestSafeDecimal:
def test_string_input(self):
assert _safe_decimal("3.99") == Decimal("3.99")
def test_float_input(self):
assert _safe_decimal(3.99) == Decimal("3.99")
def test_int_input(self):
assert _safe_decimal(4) == Decimal("4")
def test_none_returns_default(self):
assert _safe_decimal(None) == Decimal("0")
def test_none_custom_default(self):
assert _safe_decimal(None, Decimal("1")) == Decimal("1")
def test_invalid_returns_default(self):
assert _safe_decimal("not-a-number") == Decimal("0")
def test_decimal_passthrough(self):
assert _safe_decimal(Decimal("5.50")) == Decimal("5.50")
class TestParseMeijerItem:
def test_basic_item(self):
raw = {
"description": "Kroger Whole Milk 1 Gallon",
"upc": "0041250000001",
"quantity": 1,
"unitPrice": "3.99",
"extendedPrice": "3.99",
"category": "DAIRY",
}
item = parse_meijer_item(raw)
assert item.product_name_raw == "Kroger Whole Milk 1 Gallon"
assert item.upc == "41250000001" # leading zeros stripped
assert item.quantity == Decimal("1")
assert item.unit_price == Decimal("3.99")
assert item.extended_price == Decimal("3.99")
assert item.category_raw == "DAIRY"
def test_alternate_field_names(self):
raw = {
"name": "Eggs Large 12 ct",
"upcCode": "012345",
"qty": 2,
"price": "4.50",
"totalPrice": "9.00",
"department": "EGGS",
}
item = parse_meijer_item(raw)
assert item.product_name_raw == "Eggs Large 12 ct"
assert item.upc == "12345"
assert item.quantity == Decimal("2")
assert item.unit_price == Decimal("4.50")
assert item.extended_price == Decimal("9.00")
assert item.category_raw == "EGGS"
def test_calculates_extended_from_unit_price(self):
raw = {
"description": "Bananas",
"unitPrice": "0.59",
"quantity": 3,
}
item = parse_meijer_item(raw)
assert item.extended_price == Decimal("1.77")
def test_discounts_parsed(self):
raw = {
"description": "Cereal",
"unitPrice": "4.99",
"extendedPrice": "4.99",
"regularPrice": "5.99",
"salePrice": "4.99",
"couponAmount": "1.00",
"loyaltyAmount": "0.50",
}
item = parse_meijer_item(raw)
assert item.regular_price == Decimal("5.99")
assert item.sale_price == Decimal("4.99")
assert item.coupon_discount == Decimal("1.00")
assert item.loyalty_discount == Decimal("0.50")
def test_alternate_discount_names(self):
raw = {
"description": "Bread",
"unitPrice": "2.99",
"extendedPrice": "2.99",
"couponDiscount": "0.75",
"loyaltyDiscount": "0.25",
}
item = parse_meijer_item(raw)
assert item.coupon_discount == Decimal("0.75")
assert item.loyalty_discount == Decimal("0.25")
def test_missing_fields_default_gracefully(self):
raw = {"description": "Mystery Item"}
item = parse_meijer_item(raw)
assert item.product_name_raw == "Mystery Item"
assert item.upc is None
assert item.quantity == Decimal("1")
assert item.unit_price == Decimal("0")
assert item.regular_price is None
assert item.category_raw is None
def test_no_upc_returns_none(self):
raw = {"description": "Loose Bananas", "unitPrice": "1.00", "extendedPrice": "1.00"}
item = parse_meijer_item(raw)
assert item.upc is None
class TestNormalizeReceipt:
def test_full_receipt(self):
user_id = str(uuid.uuid4())
store_id = str(uuid.uuid4())
raw = {
"receiptId": "REC-001",
"date": "2026-03-15",
"total": "25.47",
"subtotal": "23.00",
"tax": "2.47",
"savings": "3.00",
"items": [
{"description": "Milk", "unitPrice": "3.99", "extendedPrice": "3.99"},
{"description": "Bread", "unitPrice": "2.50", "extendedPrice": "2.50"},
],
}
purchase = normalize_receipt(raw, user_id, store_id)
assert purchase.receipt_id == "REC-001"
assert purchase.purchase_date == date(2026, 3, 15)
assert purchase.total == Decimal("25.47")
assert purchase.subtotal == Decimal("23.00")
assert purchase.tax == Decimal("2.47")
assert purchase.savings_total == Decimal("3.00")
assert len(purchase.items) == 2
assert purchase.items[0].product_name_raw == "Milk"
assert purchase.raw_data == raw
def test_alternate_receipt_fields(self):
user_id = str(uuid.uuid4())
store_id = str(uuid.uuid4())
raw = {
"receipt_id": "REC-002",
"purchaseDate": "2026-03-14",
"totalAmount": "10.00",
"taxAmount": "0.75",
"totalSavings": "1.50",
"items": [],
}
purchase = normalize_receipt(raw, user_id, store_id)
assert purchase.receipt_id == "REC-002"
assert purchase.purchase_date == date(2026, 3, 14)
assert purchase.total == Decimal("10.00")
assert purchase.tax == Decimal("0.75")
assert purchase.savings_total == Decimal("1.50")
def test_missing_date_defaults_to_today(self):
user_id = str(uuid.uuid4())
store_id = str(uuid.uuid4())
raw = {"total": "5.00", "items": []}
purchase = normalize_receipt(raw, user_id, store_id)
assert purchase.purchase_date == date.today()
def test_generates_receipt_id_if_missing(self):
user_id = str(uuid.uuid4())
store_id = str(uuid.uuid4())
raw = {"total": "5.00", "date": "2026-03-15", "items": []}
purchase = normalize_receipt(raw, user_id, store_id)
assert purchase.receipt_id # Should be a generated UUID string
def test_date_object_passthrough(self):
user_id = str(uuid.uuid4())
store_id = str(uuid.uuid4())
raw = {"date": date(2026, 1, 1), "total": "5.00", "items": []}
purchase = normalize_receipt(raw, user_id, store_id)
assert purchase.purchase_date == date(2026, 1, 1)
+233
View File
@@ -0,0 +1,233 @@
"""Tests for shrinkflation detection pipeline."""
import uuid
from datetime import UTC, date, datetime
from decimal import Decimal
from cartsnitch_common.constants import SizeUnit
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.pipeline.shrinkflation import (
_to_comparable,
_units_comparable,
detect_shrinkflation,
)
class TestToComparable:
def test_oz_to_grams(self):
result = _to_comparable("16", SizeUnit.OZ)
assert result is not None
assert result == Decimal("16") * Decimal("28.3495")
def test_lb_to_grams(self):
result = _to_comparable("1", SizeUnit.LB)
assert result == Decimal("453.592")
def test_ml_to_ml(self):
assert _to_comparable("500", SizeUnit.ML) == Decimal("500")
def test_fl_oz_to_ml(self):
result = _to_comparable("12", SizeUnit.FL_OZ)
assert result is not None
assert result == Decimal("12") * Decimal("29.5735")
def test_count_units(self):
assert _to_comparable("12", SizeUnit.CT) == Decimal("12")
assert _to_comparable("6", SizeUnit.PK) == Decimal("6")
def test_invalid_size(self):
assert _to_comparable("abc", SizeUnit.OZ) is None
class TestUnitsComparable:
def test_weight_comparable(self):
assert _units_comparable(SizeUnit.OZ, SizeUnit.LB) is True
assert _units_comparable(SizeUnit.G, SizeUnit.KG) is True
def test_volume_comparable(self):
assert _units_comparable(SizeUnit.ML, SizeUnit.L) is True
assert _units_comparable(SizeUnit.FL_OZ, SizeUnit.ML) is True
def test_count_comparable(self):
assert _units_comparable(SizeUnit.CT, SizeUnit.PK) is True
def test_not_comparable_across_systems(self):
assert _units_comparable(SizeUnit.OZ, SizeUnit.ML) is False
assert _units_comparable(SizeUnit.CT, SizeUnit.OZ) is False
assert _units_comparable(SizeUnit.LB, SizeUnit.L) is False
class TestDetectShrinkflation:
def _make_product(self, session, size: str, unit: SizeUnit, name: str = "Test Product"):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name=name,
size=size,
size_unit=unit,
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.flush()
return product
def test_detects_oz_decrease(self, session):
product = self._make_product(session, "16", SizeUnit.OZ)
event = detect_shrinkflation(
session,
product=product,
new_size="14",
new_unit=SizeUnit.OZ,
detected_date=date(2026, 3, 15),
)
assert event is not None
assert event.old_size == "16"
assert event.new_size == "14"
assert "decreased" in event.notes.lower()
def test_no_detection_when_size_increases(self, session):
product = self._make_product(session, "14", SizeUnit.OZ)
event = detect_shrinkflation(
session,
product=product,
new_size="16",
new_unit=SizeUnit.OZ,
)
assert event is None
def test_no_detection_same_size(self, session):
product = self._make_product(session, "16", SizeUnit.OZ)
event = detect_shrinkflation(
session,
product=product,
new_size="16",
new_unit=SizeUnit.OZ,
)
assert event is None
def test_no_detection_incompatible_units(self, session):
product = self._make_product(session, "16", SizeUnit.OZ)
event = detect_shrinkflation(
session,
product=product,
new_size="400",
new_unit=SizeUnit.ML,
)
assert event is None
def test_no_detection_without_existing_size(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="No Size Product",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.flush()
event = detect_shrinkflation(
session,
product=product,
new_size="12",
new_unit=SizeUnit.OZ,
)
assert event is None
def test_cross_unit_detection_same_system(self, session):
# 1 lb = 453.592g, 14 oz = 396.893g → size decreased
product = self._make_product(session, "1", SizeUnit.LB)
event = detect_shrinkflation(
session,
product=product,
new_size="14",
new_unit=SizeUnit.OZ,
detected_date=date(2026, 3, 15),
)
assert event is not None
def test_count_decrease(self, session):
product = self._make_product(session, "12", SizeUnit.CT)
event = detect_shrinkflation(
session,
product=product,
new_size="10",
new_unit=SizeUnit.CT,
detected_date=date(2026, 3, 15),
)
assert event is not None
assert event.old_size == "12"
assert event.new_size == "10"
def test_dedup_existing_event(self, session):
product = self._make_product(session, "16", SizeUnit.OZ)
# First detection
event1 = detect_shrinkflation(
session,
product=product,
new_size="14",
new_unit=SizeUnit.OZ,
detected_date=date(2026, 3, 15),
)
# Same detection again — should return existing
event2 = detect_shrinkflation(
session,
product=product,
new_size="14",
new_unit=SizeUnit.OZ,
detected_date=date(2026, 3, 16),
)
assert event1 is not None
assert event2 is not None
assert event1.id == event2.id
def test_confidence_scaling(self, session):
# Small decrease (< 5%) → 0.70
product1 = self._make_product(session, "100", SizeUnit.G, "Product A")
event1 = detect_shrinkflation(
session,
product=product1,
new_size="97",
new_unit=SizeUnit.G,
detected_date=date(2026, 3, 15),
)
assert event1 is not None
assert event1.confidence == Decimal("0.70")
# Medium decrease (5-10%) → 0.85
product2 = self._make_product(session, "100", SizeUnit.G, "Product B")
event2 = detect_shrinkflation(
session,
product=product2,
new_size="93",
new_unit=SizeUnit.G,
detected_date=date(2026, 3, 15),
)
assert event2 is not None
assert event2.confidence == Decimal("0.85")
# Large decrease (>= 10%) → 0.95
product3 = self._make_product(session, "100", SizeUnit.G, "Product C")
event3 = detect_shrinkflation(
session,
product=product3,
new_size="85",
new_unit=SizeUnit.G,
detected_date=date(2026, 3, 15),
)
assert event3 is not None
assert event3.confidence == Decimal("0.95")
def test_min_size_decrease_threshold(self, session):
product = self._make_product(session, "100", SizeUnit.G)
# 0.5% decrease — below default 1% threshold
event = detect_shrinkflation(
session,
product=product,
new_size="99.5",
new_unit=SizeUnit.G,
min_size_decrease_pct=Decimal("1"),
)
assert event is None
+225
View File
@@ -0,0 +1,225 @@
"""Tests for Pydantic v2 schemas."""
import uuid
from datetime import UTC, date, datetime
from decimal import Decimal
import pytest
from pydantic import ValidationError
from cartsnitch_common.constants import (
AccountStatus,
DiscountType,
EventType,
PriceSource,
ProductCategory,
SizeUnit,
StoreSlug,
)
from cartsnitch_common.schemas import (
CouponCreate,
EventEnvelope,
NormalizedProductCreate,
PriceHistoryCreate,
PurchaseCreate,
PurchaseItemCreate,
ShrinkflationEventCreate,
StoreCreate,
StoreLocationCreate,
StoreRead,
UserCreate,
UserStoreAccountCreate,
)
class TestStoreSchemas:
def test_store_create_valid(self):
s = StoreCreate(name="Meijer", slug=StoreSlug.MEIJER)
assert s.slug == StoreSlug.MEIJER
def test_store_create_invalid_slug(self):
with pytest.raises(ValidationError):
StoreCreate(name="Walmart", slug="walmart")
def test_store_read_from_attributes(self):
data = {
"id": uuid.uuid4(),
"name": "Kroger",
"slug": StoreSlug.KROGER,
"logo_url": None,
"website_url": None,
"created_at": datetime.now(UTC),
"updated_at": datetime.now(UTC),
}
s = StoreRead(**data)
assert s.slug == StoreSlug.KROGER
class TestStoreLocationSchemas:
def test_location_create(self):
loc = StoreLocationCreate(
store_id=uuid.uuid4(),
address="456 Oak Ave",
city="Detroit",
state="MI",
zip="48201",
)
assert loc.city == "Detroit"
class TestUserSchemas:
def test_user_create_valid(self):
u = UserCreate(email="test@example.com", password="secret123")
assert u.email == "test@example.com"
def test_user_create_invalid_email(self):
with pytest.raises(ValidationError):
UserCreate(email="not-an-email", password="secret123")
class TestUserStoreAccountSchemas:
def test_account_create_with_status(self):
a = UserStoreAccountCreate(
user_id=uuid.uuid4(),
store_id=uuid.uuid4(),
status=AccountStatus.EXPIRED,
)
assert a.status == AccountStatus.EXPIRED
def test_account_create_default_status(self):
a = UserStoreAccountCreate(
user_id=uuid.uuid4(),
store_id=uuid.uuid4(),
)
assert a.status == AccountStatus.ACTIVE
def test_account_create_invalid_status(self):
with pytest.raises(ValidationError):
UserStoreAccountCreate(
user_id=uuid.uuid4(),
store_id=uuid.uuid4(),
status="invalid_status",
)
class TestPurchaseSchemas:
def test_purchase_create_with_items(self):
p = PurchaseCreate(
user_id=uuid.uuid4(),
store_id=uuid.uuid4(),
receipt_id="RCP-001",
purchase_date=date(2026, 3, 15),
total=Decimal("42.50"),
items=[
PurchaseItemCreate(
product_name_raw="Milk",
unit_price=Decimal("3.49"),
extended_price=Decimal("3.49"),
),
],
)
assert len(p.items) == 1
assert p.items[0].quantity == Decimal("1")
class TestNormalizedProductSchemas:
def test_product_create_with_enums(self):
p = NormalizedProductCreate(
canonical_name="Whole Milk, 1 Gallon",
category=ProductCategory.DAIRY,
size_unit=SizeUnit.FL_OZ,
upc_variants=["0041250000001"],
)
assert p.category == ProductCategory.DAIRY
def test_product_create_invalid_category(self):
with pytest.raises(ValidationError):
NormalizedProductCreate(
canonical_name="Test",
category="invalid_category",
)
class TestPriceHistorySchemas:
def test_price_create(self):
p = PriceHistoryCreate(
normalized_product_id=uuid.uuid4(),
store_id=uuid.uuid4(),
observed_date=date(2026, 3, 15),
regular_price=Decimal("4.99"),
source=PriceSource.RECEIPT,
)
assert p.source == PriceSource.RECEIPT
def test_price_create_invalid_source(self):
with pytest.raises(ValidationError):
PriceHistoryCreate(
normalized_product_id=uuid.uuid4(),
store_id=uuid.uuid4(),
observed_date=date(2026, 3, 15),
regular_price=Decimal("4.99"),
source="invalid_source",
)
class TestCouponSchemas:
def test_coupon_create(self):
c = CouponCreate(
store_id=uuid.uuid4(),
title="BOGO Chips",
discount_type=DiscountType.BOGO,
)
assert c.discount_type == DiscountType.BOGO
def test_coupon_create_invalid_discount_type(self):
with pytest.raises(ValidationError):
CouponCreate(
store_id=uuid.uuid4(),
title="Test",
discount_type="free_stuff",
)
class TestShrinkflationEventSchemas:
def test_shrinkflation_create(self):
s = ShrinkflationEventCreate(
normalized_product_id=uuid.uuid4(),
detected_date=date(2026, 3, 10),
old_size="18",
new_size="15.4",
old_unit=SizeUnit.OZ,
new_unit=SizeUnit.OZ,
confidence=Decimal("0.95"),
)
assert s.old_unit == SizeUnit.OZ
def test_shrinkflation_create_invalid_unit(self):
with pytest.raises(ValidationError):
ShrinkflationEventCreate(
normalized_product_id=uuid.uuid4(),
detected_date=date(2026, 3, 10),
old_size="18",
new_size="15.4",
old_unit="bushels",
new_unit=SizeUnit.OZ,
)
class TestEventEnvelope:
def test_valid_event(self):
e = EventEnvelope(
event_type=EventType.RECEIPTS_INGESTED,
timestamp=datetime.now(UTC),
service="receiptwitness",
payload={"receipt_id": "RCP-001"},
)
assert e.event_type == EventType.RECEIPTS_INGESTED
def test_invalid_event_type(self):
with pytest.raises(ValidationError):
EventEnvelope(
event_type="invalid.event",
timestamp=datetime.now(UTC),
service="test",
payload={},
)
+357
View File
@@ -0,0 +1,357 @@
"""Tests for the seed data generator."""
import random
from faker import Faker
from cartsnitch_common.seed.config import (
NUM_ACTIVE_USERS,
NUM_COUPONS,
NUM_PRICE_HISTORY,
NUM_PRODUCTS,
NUM_PURCHASE_ITEMS,
NUM_PURCHASES,
NUM_SHRINKFLATION_EVENTS,
NUM_STORES,
NUM_USERS,
SEED_END_DATE,
SEED_START_DATE,
SEED_VALUE,
)
from cartsnitch_common.seed.generators.coupons import generate_coupons
from cartsnitch_common.seed.generators.prices import generate_price_history
from cartsnitch_common.seed.generators.products import generate_products
from cartsnitch_common.seed.generators.purchases import generate_purchase_items, generate_purchases
from cartsnitch_common.seed.generators.shrinkflation import generate_shrinkflation_events
from cartsnitch_common.seed.generators.stores import generate_store_locations, generate_stores
from cartsnitch_common.seed.generators.users import generate_users
def _seed() -> None:
random.seed(SEED_VALUE)
Faker.seed(SEED_VALUE)
def _make_fake() -> Faker:
return Faker()
# ---------------------------------------------------------------------------
# Stores
# ---------------------------------------------------------------------------
def test_generate_stores_count() -> None:
stores = generate_stores()
assert len(stores) == NUM_STORES
def test_generate_stores_deterministic() -> None:
stores_a = generate_stores()
stores_b = generate_stores()
# Stores are fixed (no RNG), so slugs are stable
slugs_a = {s["slug"] for s in stores_a}
slugs_b = {s["slug"] for s in stores_b}
assert slugs_a == slugs_b
def test_generate_store_locations_count() -> None:
stores = generate_stores()
locs = generate_store_locations(stores)
assert len(locs) == 15 # 3 stores * 5 locations
def test_generate_store_locations_fk() -> None:
stores = generate_stores()
locs = generate_store_locations(stores)
store_ids = {s["id"] for s in stores}
for loc in locs:
assert loc["store_id"] in store_ids
# ---------------------------------------------------------------------------
# Users
# ---------------------------------------------------------------------------
def test_generate_users_count() -> None:
_seed()
fake = _make_fake()
users = generate_users(fake)
assert len(users) == NUM_USERS
def test_generate_users_active_count() -> None:
_seed()
fake = _make_fake()
users = generate_users(fake)
active = [u for u in users if u["_active"]]
assert len(active) == NUM_ACTIVE_USERS
def test_generate_users_deterministic() -> None:
_seed()
fake_a = _make_fake()
users_a = generate_users(fake_a)
_seed()
fake_b = _make_fake()
users_b = generate_users(fake_b)
# Emails should match (same seed → same Faker output)
emails_a = [u["email"] for u in users_a]
emails_b = [u["email"] for u in users_b]
assert emails_a == emails_b
def test_generate_users_unique_emails() -> None:
_seed()
fake = _make_fake()
users = generate_users(fake)
emails = [u["email"] for u in users]
assert len(emails) == len(set(emails))
# ---------------------------------------------------------------------------
# Products
# ---------------------------------------------------------------------------
def test_generate_products_count() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
assert len(products) == NUM_PRODUCTS
def test_generate_products_deterministic() -> None:
_seed()
fake_a = _make_fake()
products_a = generate_products(fake_a)
_seed()
fake_b = _make_fake()
products_b = generate_products(fake_b)
names_a = [p["canonical_name"] for p in products_a]
names_b = [p["canonical_name"] for p in products_b]
assert names_a == names_b
def test_generate_products_have_categories() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
for product in products:
assert product["category"] is not None
def test_generate_products_have_upc_variants() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
for product in products:
assert product["upc_variants"]
assert isinstance(product["upc_variants"], list)
assert len(product["upc_variants"]) >= 1
# ---------------------------------------------------------------------------
# Purchases
# ---------------------------------------------------------------------------
def test_generate_purchases_count() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
assert len(purchases) == NUM_PURCHASES
def test_generate_purchases_fk() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
user_ids = {u["id"] for u in users}
store_ids = {s["id"] for s in stores}
for p in purchases:
assert p["user_id"] in user_ids
assert p["store_id"] in store_ids
def test_generate_purchase_items_count() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
products = generate_products(fake)
items = generate_purchase_items(purchases, products)
# Should be close to target (within 20%)
assert abs(len(items) - NUM_PURCHASE_ITEMS) < NUM_PURCHASE_ITEMS * 0.20
def test_generate_purchase_items_fk() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
products = generate_products(fake)
items = generate_purchase_items(purchases, products)
purchase_ids = {p["id"] for p in purchases}
product_ids = {p["id"] for p in products}
for item in items:
assert item["purchase_id"] in purchase_ids
assert item["normalized_product_id"] in product_ids
# ---------------------------------------------------------------------------
# Price History
# ---------------------------------------------------------------------------
def test_generate_price_history_count() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
products = generate_products(fake)
items = generate_purchase_items(purchases, products)
prices = generate_price_history(products, stores, items)
# Should be within 10% of target
assert abs(len(prices) - NUM_PRICE_HISTORY) < NUM_PRICE_HISTORY * 0.10
def test_generate_price_history_fk() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
products = generate_products(fake)
items = generate_purchase_items(purchases, products)
prices = generate_price_history(products, stores, items)
product_ids = {p["id"] for p in products}
store_ids = {s["id"] for s in stores}
for ph in prices:
assert ph["normalized_product_id"] in product_ids
assert ph["store_id"] in store_ids
assert ph["regular_price"] > 0
def test_price_history_dates_in_range() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
store_locs = generate_store_locations(stores)
users = generate_users(fake)
purchases = generate_purchases(users, stores, store_locs)
products = generate_products(fake)
items = generate_purchase_items(purchases, products)
prices = generate_price_history(products, stores, items)
for ph in prices:
assert SEED_START_DATE <= ph["observed_date"] <= SEED_END_DATE
# ---------------------------------------------------------------------------
# Coupons
# ---------------------------------------------------------------------------
def test_generate_coupons_count() -> None:
_seed()
fake = _make_fake()
stores = generate_stores()
products = generate_products(fake)
coupons = generate_coupons(fake, products, stores)
assert len(coupons) == NUM_COUPONS
def test_generate_coupons_mix() -> None:
"""Verify ~60% expired and ~40% active."""
_seed()
fake = _make_fake()
stores = generate_stores()
products = generate_products(fake)
coupons = generate_coupons(fake, products, stores)
expired = [c for c in coupons if c["valid_to"] < SEED_END_DATE]
active = [c for c in coupons if c["valid_to"] >= SEED_END_DATE]
# Allow ±15% variance from target
assert len(expired) / NUM_COUPONS > 0.45
assert len(active) / NUM_COUPONS > 0.25
# ---------------------------------------------------------------------------
# Shrinkflation
# ---------------------------------------------------------------------------
def test_generate_shrinkflation_count() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
events = generate_shrinkflation_events(products)
assert len(events) == NUM_SHRINKFLATION_EVENTS
def test_generate_shrinkflation_fk() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
events = generate_shrinkflation_events(products)
product_ids = {p["id"] for p in products}
for event in events:
assert event["normalized_product_id"] in product_ids
def test_generate_shrinkflation_price_held_or_increased() -> None:
"""Validate shrinkflation: new_size < old_size, price maintained or up."""
_seed()
fake = _make_fake()
products = generate_products(fake)
events = generate_shrinkflation_events(products)
for event in events:
old_size = float(event["old_size"])
new_size = float(event["new_size"])
assert new_size < old_size, f"Expected size reduction: {old_size} -> {new_size}"
if event["price_at_old_size"] and event["price_at_new_size"]:
# Price should be maintained or increased (not significantly dropped)
assert float(event["price_at_new_size"]) >= float(event["price_at_old_size"]) * 0.95
def test_generate_shrinkflation_confidence_range() -> None:
_seed()
fake = _make_fake()
products = generate_products(fake)
events = generate_shrinkflation_events(products)
for event in events:
assert 0 <= float(event["confidence"]) <= 1.0
# ---------------------------------------------------------------------------
# Dry-run smoke test
# ---------------------------------------------------------------------------
def test_dry_run_does_not_raise() -> None:
"""Smoke test the full run_seed in dry-run mode."""
from cartsnitch_common.seed.runner import run_seed
run_seed(dry_run=True, seed_value=SEED_VALUE)