forked from cartsnitch/cartsnitch
205 lines
7.0 KiB
Python
205 lines
7.0 KiB
Python
"""Tests for receipt normalization pipeline."""
|
|
|
|
import uuid
|
|
from datetime import date
|
|
from decimal import Decimal
|
|
|
|
from cartsnitch_common.pipeline.receipt import (
|
|
_clean_product_name,
|
|
_safe_decimal,
|
|
normalize_receipt,
|
|
parse_meijer_item,
|
|
)
|
|
|
|
|
|
class TestCleanProductName:
|
|
def test_strips_whitespace(self):
|
|
assert _clean_product_name(" Milk ") == "Milk"
|
|
|
|
def test_removes_leading_punctuation(self):
|
|
assert _clean_product_name("---Milk---") == "Milk"
|
|
|
|
def test_collapses_internal_whitespace(self):
|
|
assert _clean_product_name("Whole Milk Gallon") == "Whole Milk Gallon"
|
|
|
|
def test_empty_string(self):
|
|
assert _clean_product_name("") == ""
|
|
|
|
|
|
class TestSafeDecimal:
|
|
def test_string_input(self):
|
|
assert _safe_decimal("3.99") == Decimal("3.99")
|
|
|
|
def test_float_input(self):
|
|
assert _safe_decimal(3.99) == Decimal("3.99")
|
|
|
|
def test_int_input(self):
|
|
assert _safe_decimal(4) == Decimal("4")
|
|
|
|
def test_none_returns_default(self):
|
|
assert _safe_decimal(None) == Decimal("0")
|
|
|
|
def test_none_custom_default(self):
|
|
assert _safe_decimal(None, Decimal("1")) == Decimal("1")
|
|
|
|
def test_invalid_returns_default(self):
|
|
assert _safe_decimal("not-a-number") == Decimal("0")
|
|
|
|
def test_decimal_passthrough(self):
|
|
assert _safe_decimal(Decimal("5.50")) == Decimal("5.50")
|
|
|
|
|
|
class TestParseMeijerItem:
|
|
def test_basic_item(self):
|
|
raw = {
|
|
"description": "Kroger Whole Milk 1 Gallon",
|
|
"upc": "0041250000001",
|
|
"quantity": 1,
|
|
"unitPrice": "3.99",
|
|
"extendedPrice": "3.99",
|
|
"category": "DAIRY",
|
|
}
|
|
item = parse_meijer_item(raw)
|
|
assert item.product_name_raw == "Kroger Whole Milk 1 Gallon"
|
|
assert item.upc == "41250000001" # leading zeros stripped
|
|
assert item.quantity == Decimal("1")
|
|
assert item.unit_price == Decimal("3.99")
|
|
assert item.extended_price == Decimal("3.99")
|
|
assert item.category_raw == "DAIRY"
|
|
|
|
def test_alternate_field_names(self):
|
|
raw = {
|
|
"name": "Eggs Large 12 ct",
|
|
"upcCode": "012345",
|
|
"qty": 2,
|
|
"price": "4.50",
|
|
"totalPrice": "9.00",
|
|
"department": "EGGS",
|
|
}
|
|
item = parse_meijer_item(raw)
|
|
assert item.product_name_raw == "Eggs Large 12 ct"
|
|
assert item.upc == "12345"
|
|
assert item.quantity == Decimal("2")
|
|
assert item.unit_price == Decimal("4.50")
|
|
assert item.extended_price == Decimal("9.00")
|
|
assert item.category_raw == "EGGS"
|
|
|
|
def test_calculates_extended_from_unit_price(self):
|
|
raw = {
|
|
"description": "Bananas",
|
|
"unitPrice": "0.59",
|
|
"quantity": 3,
|
|
}
|
|
item = parse_meijer_item(raw)
|
|
assert item.extended_price == Decimal("1.77")
|
|
|
|
def test_discounts_parsed(self):
|
|
raw = {
|
|
"description": "Cereal",
|
|
"unitPrice": "4.99",
|
|
"extendedPrice": "4.99",
|
|
"regularPrice": "5.99",
|
|
"salePrice": "4.99",
|
|
"couponAmount": "1.00",
|
|
"loyaltyAmount": "0.50",
|
|
}
|
|
item = parse_meijer_item(raw)
|
|
assert item.regular_price == Decimal("5.99")
|
|
assert item.sale_price == Decimal("4.99")
|
|
assert item.coupon_discount == Decimal("1.00")
|
|
assert item.loyalty_discount == Decimal("0.50")
|
|
|
|
def test_alternate_discount_names(self):
|
|
raw = {
|
|
"description": "Bread",
|
|
"unitPrice": "2.99",
|
|
"extendedPrice": "2.99",
|
|
"couponDiscount": "0.75",
|
|
"loyaltyDiscount": "0.25",
|
|
}
|
|
item = parse_meijer_item(raw)
|
|
assert item.coupon_discount == Decimal("0.75")
|
|
assert item.loyalty_discount == Decimal("0.25")
|
|
|
|
def test_missing_fields_default_gracefully(self):
|
|
raw = {"description": "Mystery Item"}
|
|
item = parse_meijer_item(raw)
|
|
assert item.product_name_raw == "Mystery Item"
|
|
assert item.upc is None
|
|
assert item.quantity == Decimal("1")
|
|
assert item.unit_price == Decimal("0")
|
|
assert item.regular_price is None
|
|
assert item.category_raw is None
|
|
|
|
def test_no_upc_returns_none(self):
|
|
raw = {"description": "Loose Bananas", "unitPrice": "1.00", "extendedPrice": "1.00"}
|
|
item = parse_meijer_item(raw)
|
|
assert item.upc is None
|
|
|
|
|
|
class TestNormalizeReceipt:
|
|
def test_full_receipt(self):
|
|
user_id = str(uuid.uuid4())
|
|
store_id = str(uuid.uuid4())
|
|
raw = {
|
|
"receiptId": "REC-001",
|
|
"date": "2026-03-15",
|
|
"total": "25.47",
|
|
"subtotal": "23.00",
|
|
"tax": "2.47",
|
|
"savings": "3.00",
|
|
"items": [
|
|
{"description": "Milk", "unitPrice": "3.99", "extendedPrice": "3.99"},
|
|
{"description": "Bread", "unitPrice": "2.50", "extendedPrice": "2.50"},
|
|
],
|
|
}
|
|
purchase = normalize_receipt(raw, user_id, store_id)
|
|
assert purchase.receipt_id == "REC-001"
|
|
assert purchase.purchase_date == date(2026, 3, 15)
|
|
assert purchase.total == Decimal("25.47")
|
|
assert purchase.subtotal == Decimal("23.00")
|
|
assert purchase.tax == Decimal("2.47")
|
|
assert purchase.savings_total == Decimal("3.00")
|
|
assert len(purchase.items) == 2
|
|
assert purchase.items[0].product_name_raw == "Milk"
|
|
assert purchase.raw_data == raw
|
|
|
|
def test_alternate_receipt_fields(self):
|
|
user_id = str(uuid.uuid4())
|
|
store_id = str(uuid.uuid4())
|
|
raw = {
|
|
"receipt_id": "REC-002",
|
|
"purchaseDate": "2026-03-14",
|
|
"totalAmount": "10.00",
|
|
"taxAmount": "0.75",
|
|
"totalSavings": "1.50",
|
|
"items": [],
|
|
}
|
|
purchase = normalize_receipt(raw, user_id, store_id)
|
|
assert purchase.receipt_id == "REC-002"
|
|
assert purchase.purchase_date == date(2026, 3, 14)
|
|
assert purchase.total == Decimal("10.00")
|
|
assert purchase.tax == Decimal("0.75")
|
|
assert purchase.savings_total == Decimal("1.50")
|
|
|
|
def test_missing_date_defaults_to_today(self):
|
|
user_id = str(uuid.uuid4())
|
|
store_id = str(uuid.uuid4())
|
|
raw = {"total": "5.00", "items": []}
|
|
purchase = normalize_receipt(raw, user_id, store_id)
|
|
assert purchase.purchase_date == date.today()
|
|
|
|
def test_generates_receipt_id_if_missing(self):
|
|
user_id = str(uuid.uuid4())
|
|
store_id = str(uuid.uuid4())
|
|
raw = {"total": "5.00", "date": "2026-03-15", "items": []}
|
|
purchase = normalize_receipt(raw, user_id, store_id)
|
|
assert purchase.receipt_id # Should be a generated UUID string
|
|
|
|
def test_date_object_passthrough(self):
|
|
user_id = str(uuid.uuid4())
|
|
store_id = str(uuid.uuid4())
|
|
raw = {"date": date(2026, 1, 1), "total": "5.00", "items": []}
|
|
purchase = normalize_receipt(raw, user_id, store_id)
|
|
assert purchase.purchase_date == date(2026, 1, 1)
|