Files
cartsnitch-fork-test/common/tests/test_pipeline_matching.py
T

161 lines
5.6 KiB
Python

"""Tests for product matching & dedup pipeline."""
import uuid
from datetime import UTC, datetime
from decimal import Decimal
from cartsnitch_common.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.normalization import MatchMethod
from cartsnitch_common.pipeline.matching import (
ProductMatcher,
classify_confidence,
match_purchase_item,
)
from cartsnitch_common.schemas.purchase import PurchaseItemCreate
class TestClassifyConfidence:
def test_upc_always_high(self):
assert classify_confidence(1.0, MatchMethod.UPC) == MatchConfidence.HIGH
assert classify_confidence(0.5, MatchMethod.UPC) == MatchConfidence.HIGH
def test_name_high(self):
assert classify_confidence(0.9, MatchMethod.NAME) == MatchConfidence.HIGH
assert classify_confidence(0.8, MatchMethod.NAME) == MatchConfidence.HIGH
def test_name_medium(self):
assert classify_confidence(0.6, MatchMethod.NAME) == MatchConfidence.MEDIUM
assert classify_confidence(0.5, MatchMethod.NAME) == MatchConfidence.MEDIUM
def test_name_low(self):
assert classify_confidence(0.3, MatchMethod.NAME) == MatchConfidence.LOW
assert classify_confidence(0.0, MatchMethod.NAME) == MatchConfidence.LOW
class TestProductMatcher:
def _make_item(self, name: str, upc: str | None = None) -> PurchaseItemCreate:
return PurchaseItemCreate(
product_name_raw=name,
upc=upc,
unit_price=Decimal("3.99"),
extended_price=Decimal("3.99"),
)
def test_match_by_upc(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
upc_variants=["041250000001"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
item = self._make_item("Kroger Milk", upc="041250000001")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert prod.id == product.id
assert result is not None
assert result.method == MatchMethod.UPC
assert confidence == MatchConfidence.HIGH
def test_match_by_name(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Whole Milk Gallon",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session, name_threshold=0.3)
item = self._make_item("Whole Milk Gallon Size")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is not None
assert result.method == MatchMethod.NAME
def test_auto_create_when_no_match(self, session):
matcher = ProductMatcher(session, auto_create=True)
item = self._make_item("Unique Product XYZ 16 oz")
prod, result, confidence = matcher.match_single(item)
assert prod is not None
assert result is None # No match found, was created
assert confidence == MatchConfidence.LOW
assert prod.canonical_name == "Unique Product XYZ 16 oz"
assert prod.size == "16"
assert prod.size_unit == "oz"
def test_no_create_when_disabled(self, session):
matcher = ProductMatcher(session, auto_create=False)
item = self._make_item("Nonexistent Product")
prod, result, confidence = matcher.match_single(item)
assert prod is None
assert result is None
def test_batch_match(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Large Eggs 12 Count",
upc_variants=["012345"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
matcher = ProductMatcher(session)
items = [
self._make_item("Large Eggs", upc="012345"),
self._make_item("Brand New Never Seen Product"),
]
outcomes = matcher.match_items(items)
assert len(outcomes) == 2
assert outcomes[0].match is not None
assert outcomes[0].confidence_level == MatchConfidence.HIGH
assert outcomes[0].created_new is False
assert outcomes[1].match is None
assert outcomes[1].created_new is True
class TestMatchPurchaseItem:
def test_convenience_function(self, session):
product = NormalizedProduct(
id=uuid.uuid4(),
canonical_name="Ground Beef 80/20",
upc_variants=["999888"],
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(product)
session.commit()
item = PurchaseItemCreate(
product_name_raw="Ground Beef",
upc="999888",
unit_price=Decimal("5.99"),
extended_price=Decimal("5.99"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.HIGH
def test_auto_create_default(self, session):
item = PurchaseItemCreate(
product_name_raw="Totally New Item",
unit_price=Decimal("1.00"),
extended_price=Decimal("1.00"),
)
prod, confidence = match_purchase_item(session, item)
assert prod is not None
assert confidence == MatchConfidence.LOW