Files
receiptwitness/tests/test_pipeline/test_matching.py
T
Barcode Betty f47da487da feat: migrate receiptwitness to standalone repo with inlined common
Extract receiptwitness/ from the monorepo into cartsnitch/receiptwitness.
Inline the consumed modules from cartsnitch-common so there is no
cross-repo dependency.

- Add src/receiptwitness/shared/ with inlined models, schemas, constants, database
- Update all imports from cartsnitch_common to receiptwitness.shared
- Remove cartsnitch-common dependency from pyproject.toml
- Copy and update Alembic config (alembic.ini, alembic/)
- Update Dockerfile for standalone build context, add migration CMD
- Add CI workflow with lint, test, build, grype scan, deploy-dev, deploy-uat
- Add .grype.yaml

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-19 12:18:11 +00:00

162 lines
5.6 KiB
Python

"""Tests for product matching & dedup pipeline."""
import uuid
from datetime import UTC, datetime
from decimal import Decimal
from receiptwitness.shared.constants import MatchConfidence
from receiptwitness.shared.models import NormalizedProduct
from receiptwitness.shared.schemas import PurchaseItemCreate
from receiptwitness.pipeline.matching import (
ProductMatcher,
classify_confidence,
match_purchase_item,
)
from receiptwitness.pipeline.normalization import MatchMethod
class TestClassifyConfidence:
def test_upc_always_high(self):
assert classify_confidence(1.0, MatchMethod.UPC) == MatchConfidence.HIGH
assert classify_confidence(0.5, MatchMethod.UPC) == MatchConfidence.HIGH
def test_name_high(self):
assert classify_confidence(0.9, MatchMethod.NAME) == MatchConfidence.HIGH
assert classify_confidence(0.8, MatchMethod.NAME) == MatchConfidence.HIGH
def test_name_medium(self):
assert classify_confidence(0.6, MatchMethod.NAME) == MatchConfidence.MEDIUM
assert classify_confidence(0.5, MatchMethod.NAME) == MatchConfidence.MEDIUM
def test_name_low(self):
assert classify_confidence(0.3, MatchMethod.NAME) == MatchConfidence.LOW
assert classify_confidence(0.0, MatchMethod.NAME) == MatchConfidence.LOW
class TestProductMatcher:
def _make_item(self, name: str, upc: str | None = None) -> PurchaseItemCreate:
return PurchaseItemCreate(
product_name_raw=name,
upc=upc,
unit_price=Decimal("3.99"),
extended_price=Decimal("3.99"),
)
def test_match_by_upc(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
upc_variants=["041250000001"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
item = self._make_item("Kroger Milk", upc="041250000001")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert prod.id == product.id
assert result is not None
assert result.method == MatchMethod.UPC
assert confidence == MatchConfidence.HIGH
def test_match_by_name(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session, name_threshold=0.3)
item = self._make_item("Whole Milk Gallon Size")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is not None
assert result.method == MatchMethod.NAME
def test_auto_create_when_no_match(self, session):
matcher = ProductMatcher(session, auto_create=True)
item = self._make_item("Unique Product XYZ 16 oz")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is None # No match found, was created
assert confidence == MatchConfidence.LOW
assert prod.canonical_name == "Unique Product XYZ 16 oz"
assert prod.size == "16"
assert prod.size_unit == "oz"
def test_no_create_when_disabled(self, session):
matcher = ProductMatcher(session, auto_create=False)
item = self._make_item("Nonexistent Product")
prod, result, confidence = matcher.match_single(item)
assert prod is None
assert result is None
def test_batch_match(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Large Eggs 12 Count",
upc_variants=["012345"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
items = [
self._make_item("Large Eggs", upc="012345"),
self._make_item("Brand New Never Seen Product"),
]
outcomes = matcher.match_items(items)
assert len(outcomes) == 2
assert outcomes[0].match is not None
assert outcomes[0].confidence_level == MatchConfidence.HIGH
assert outcomes[0].created_new is False
assert outcomes[1].match is None
assert outcomes[1].created_new is True
class TestMatchPurchaseItem:
def test_convenience_function(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Ground Beef 80/20",
upc_variants=["999888"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
item = PurchaseItemCreate(
product_name_raw="Ground Beef",
upc="999888",
unit_price=Decimal("5.99"),
extended_price=Decimal("5.99"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.HIGH
def test_auto_create_default(self, session):
item = PurchaseItemCreate(
product_name_raw="Totally New Item",
unit_price=Decimal("1.00"),
extended_price=Decimal("1.00"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.LOW