f47da487da
Extract receiptwitness/ from the monorepo into cartsnitch/receiptwitness. Inline the consumed modules from cartsnitch-common so there is no cross-repo dependency. - Add src/receiptwitness/shared/ with inlined models, schemas, constants, database - Update all imports from cartsnitch_common to receiptwitness.shared - Remove cartsnitch-common dependency from pyproject.toml - Copy and update Alembic config (alembic.ini, alembic/) - Update Dockerfile for standalone build context, add migration CMD - Add CI workflow with lint, test, build, grype scan, deploy-dev, deploy-uat - Add .grype.yaml Co-Authored-By: Paperclip <noreply@paperclip.ing>
162 lines
5.6 KiB
Python
162 lines
5.6 KiB
Python
"""Tests for product matching & dedup pipeline."""
|
|
|
|
import uuid
|
|
from datetime import UTC, datetime
|
|
from decimal import Decimal
|
|
|
|
from receiptwitness.shared.constants import MatchConfidence
|
|
from receiptwitness.shared.models import NormalizedProduct
|
|
from receiptwitness.shared.schemas import PurchaseItemCreate
|
|
|
|
from receiptwitness.pipeline.matching import (
|
|
ProductMatcher,
|
|
classify_confidence,
|
|
match_purchase_item,
|
|
)
|
|
from receiptwitness.pipeline.normalization import MatchMethod
|
|
|
|
|
|
class TestClassifyConfidence:
|
|
def test_upc_always_high(self):
|
|
assert classify_confidence(1.0, MatchMethod.UPC) == MatchConfidence.HIGH
|
|
assert classify_confidence(0.5, MatchMethod.UPC) == MatchConfidence.HIGH
|
|
|
|
def test_name_high(self):
|
|
assert classify_confidence(0.9, MatchMethod.NAME) == MatchConfidence.HIGH
|
|
assert classify_confidence(0.8, MatchMethod.NAME) == MatchConfidence.HIGH
|
|
|
|
def test_name_medium(self):
|
|
assert classify_confidence(0.6, MatchMethod.NAME) == MatchConfidence.MEDIUM
|
|
assert classify_confidence(0.5, MatchMethod.NAME) == MatchConfidence.MEDIUM
|
|
|
|
def test_name_low(self):
|
|
assert classify_confidence(0.3, MatchMethod.NAME) == MatchConfidence.LOW
|
|
assert classify_confidence(0.0, MatchMethod.NAME) == MatchConfidence.LOW
|
|
|
|
|
|
class TestProductMatcher:
|
|
def _make_item(self, name: str, upc: str | None = None) -> PurchaseItemCreate:
|
|
return PurchaseItemCreate(
|
|
product_name_raw=name,
|
|
upc=upc,
|
|
unit_price=Decimal("3.99"),
|
|
extended_price=Decimal("3.99"),
|
|
)
|
|
|
|
def test_match_by_upc(self, session):
|
|
product = NormalizedProduct(
|
|
id=uuid.uuid4(),
|
|
canonical_name="Whole Milk Gallon",
|
|
upc_variants=["041250000001"],
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
session.add(product)
|
|
session.commit()
|
|
|
|
matcher = ProductMatcher(session)
|
|
item = self._make_item("Kroger Milk", upc="041250000001")
|
|
prod, result, confidence = matcher.match_single(item)
|
|
|
|
assert prod is not None
|
|
assert prod.id == product.id
|
|
assert result is not None
|
|
assert result.method == MatchMethod.UPC
|
|
assert confidence == MatchConfidence.HIGH
|
|
|
|
def test_match_by_name(self, session):
|
|
product = NormalizedProduct(
|
|
id=uuid.uuid4(),
|
|
canonical_name="Whole Milk Gallon",
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
session.add(product)
|
|
session.commit()
|
|
|
|
matcher = ProductMatcher(session, name_threshold=0.3)
|
|
item = self._make_item("Whole Milk Gallon Size")
|
|
prod, result, confidence = matcher.match_single(item)
|
|
|
|
assert prod is not None
|
|
assert result is not None
|
|
assert result.method == MatchMethod.NAME
|
|
|
|
def test_auto_create_when_no_match(self, session):
|
|
matcher = ProductMatcher(session, auto_create=True)
|
|
item = self._make_item("Unique Product XYZ 16 oz")
|
|
prod, result, confidence = matcher.match_single(item)
|
|
|
|
assert prod is not None
|
|
assert result is None # No match found, was created
|
|
assert confidence == MatchConfidence.LOW
|
|
assert prod.canonical_name == "Unique Product XYZ 16 oz"
|
|
assert prod.size == "16"
|
|
assert prod.size_unit == "oz"
|
|
|
|
def test_no_create_when_disabled(self, session):
|
|
matcher = ProductMatcher(session, auto_create=False)
|
|
item = self._make_item("Nonexistent Product")
|
|
prod, result, confidence = matcher.match_single(item)
|
|
|
|
assert prod is None
|
|
assert result is None
|
|
|
|
def test_batch_match(self, session):
|
|
product = NormalizedProduct(
|
|
id=uuid.uuid4(),
|
|
canonical_name="Large Eggs 12 Count",
|
|
upc_variants=["012345"],
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
session.add(product)
|
|
session.commit()
|
|
|
|
matcher = ProductMatcher(session)
|
|
items = [
|
|
self._make_item("Large Eggs", upc="012345"),
|
|
self._make_item("Brand New Never Seen Product"),
|
|
]
|
|
outcomes = matcher.match_items(items)
|
|
|
|
assert len(outcomes) == 2
|
|
assert outcomes[0].match is not None
|
|
assert outcomes[0].confidence_level == MatchConfidence.HIGH
|
|
assert outcomes[0].created_new is False
|
|
assert outcomes[1].match is None
|
|
assert outcomes[1].created_new is True
|
|
|
|
|
|
class TestMatchPurchaseItem:
|
|
def test_convenience_function(self, session):
|
|
product = NormalizedProduct(
|
|
id=uuid.uuid4(),
|
|
canonical_name="Ground Beef 80/20",
|
|
upc_variants=["999888"],
|
|
created_at=datetime.now(UTC),
|
|
updated_at=datetime.now(UTC),
|
|
)
|
|
session.add(product)
|
|
session.commit()
|
|
|
|
item = PurchaseItemCreate(
|
|
product_name_raw="Ground Beef",
|
|
upc="999888",
|
|
unit_price=Decimal("5.99"),
|
|
extended_price=Decimal("5.99"),
|
|
)
|
|
prod, confidence = match_purchase_item(session, item)
|
|
assert prod is not None
|
|
assert confidence == MatchConfidence.HIGH
|
|
|
|
def test_auto_create_default(self, session):
|
|
item = PurchaseItemCreate(
|
|
product_name_raw="Totally New Item",
|
|
unit_price=Decimal("1.00"),
|
|
extended_price=Decimal("1.00"),
|
|
)
|
|
prod, confidence = match_purchase_item(session, item)
|
|
assert prod is not None
|
|
assert confidence == MatchConfidence.LOW
|