Compare commits

..

13 Commits

Author SHA1 Message Date
Paperclip 1aff898545 fix: update vite to 6.4.2 to patch audit vulnerabilities
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 14:31:02 +00:00
CartSnitch Engineer Bot 24f0dd0e67 fix: replace N+1 UPC query with SQL containment in normalization
- Add PostgreSQL JSONB containment (@>) query for match_by_upc
- Add SQLite LIKE fallback for test compatibility
- Update upc_variants column to JSONB with variant for cross-db support
- Add GIN index migration for upc_variants

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:59:28 +00:00
cartsnitch-cto[bot] da96ec7dc4 Merge pull request #172 from cartsnitch/fix/cors-security-headers
CTO review: LGTM. CORS methods restricted to explicit list (no TRACE/CONNECT), headers whitelisted, nginx security headers added (X-Frame-Options, X-Content-Type-Options, Referrer-Policy, CSP). Clean diff, CI green.
2026-04-14 11:57:52 +00:00
CartSnitch Engineer Bot 37798251be fix: restrict CORS to explicit methods and add security headers
- Replace allow_methods=["*"] with explicit list: GET, POST, PUT, DELETE, PATCH, OPTIONS
- Replace allow_headers=["*"] with explicit list: Content-Type, Authorization, Accept, Origin, X-Requested-With
- Add X-Frame-Options, X-Content-Type-Options, Referrer-Policy, CSP nginx headers

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:49:02 +00:00
cartsnitch-cto[bot] ee97f64db6 Merge pull request #156 from cartsnitch/fix/hardcoded-secrets
fix: remove hardcoded default secrets from API config
2026-04-14 11:31:40 +00:00
CartSnitch Engineer Bot 538a5f4f4d fix: remove hardcoded default secrets from API config
Remove dangerous default values for jwt_secret_key, service_key, and
fernet_key. Add startup validation that raises RuntimeError if these
secrets are not set via environment variables or contain placeholder
values.

Add test fixture to provide explicit test values for these secrets,
ensuring existing tests continue to pass.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:11:23 +00:00
cartsnitch-cto[bot] 4485bf1d5e Merge pull request #148 from cartsnitch/betty/fix-alembic-create-all-commit
fix(api): commit after create_all in alembic env.py
2026-04-04 21:57:54 +00:00
cartsnitch-cto[bot] f7bf767da5 Merge pull request #147 from cartsnitch/betty/car-517-domain-tables-migration
CTO review: APPROVED. Migration creates all 9 domain tables in correct FK order with idempotent guards. env.py commit fix resolves SQLAlchemy 2.0 DDL persistence issue.
2026-04-04 21:36:48 +00:00
Barcode Betty 2f1833e90d fix(api): commit after create_all in alembic env.py
SQLAlchemy 2.0 removed implicit autocommit; without an explicit
connection.commit() DDL changes from create_all() are rolled back
when the connection closes, leaving fresh databases without tables.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-04 21:36:05 +00:00
cartsnitch-engineer[bot] b2725fd512 fix(api): create domain tables migration + fix create_all commit
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-04 21:22:24 +00:00
cartsnitch-cto[bot] 5532b43e38 Merge pull request #145 from cartsnitch/betty/fix-alembic-model-import
fix(api): import Base from models package to register all ORM tables
2026-04-04 21:20:11 +00:00
Barcode Betty 0be7ccd4b4 fix(api): import Base from models package to register all ORM tables
The models/__init__.py imports all ORM model classes (Store, Product,
Coupon, etc.) which registers their table definitions with Base.metadata.
Importing Base directly from models.base skips this registration, so
alembic's create_all() on fresh databases fails to create app tables.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-04 21:12:13 +00:00
cartsnitch-cto[bot] 6d37cecdba Merge pull request #143 from cartsnitch/betty/fix-session-cookie-parsing
fix(auth): parse compound Better-Auth cookie/bearer token
2026-04-04 20:39:09 +00:00
10 changed files with 341 additions and 31 deletions
+2 -1
View File
@@ -6,7 +6,7 @@ from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from cartsnitch_api.models.base import Base # noqa: F401 — imports all models for autogenerate
from cartsnitch_api.models import Base # noqa: F401 — imports all models for autogenerate
config = context.config
if config.config_file_name is not None:
@@ -53,6 +53,7 @@ def run_migrations_online() -> None:
# checkfirst=True ensures this is a no-op on existing databases.
try:
Base.metadata.create_all(bind=connection, checkfirst=True)
connection.commit()
except Exception as exc:
import logging
logging.getLogger("alembic.env").warning(
@@ -0,0 +1,210 @@
"""Create domain tables (stores, purchases, coupons, etc.).
Revision ID: 008_create_domain_tables
Revises: 007_bootstrap_users_table
Create Date: 2026-04-04
"""
import sqlalchemy as sa
from sqlalchemy import text
from alembic import op
revision = "008_create_domain_tables"
down_revision = "007_bootstrap_users_table"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# 1. stores
if not inspector.has_table("stores"):
op.create_table(
"stores",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("name", sa.String(100), nullable=False),
sa.Column("slug", sa.String(20), nullable=False, unique=True),
sa.Column("logo_url", sa.String(500), nullable=True),
sa.Column("website_url", sa.String(500), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 2. store_locations
if not inspector.has_table("store_locations"):
op.create_table(
"store_locations",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("address", sa.String(300), nullable=False),
sa.Column("city", sa.String(100), nullable=False),
sa.Column("state", sa.String(2), nullable=False),
sa.Column("zip", sa.String(10), nullable=False),
sa.Column("lat", sa.Float(), nullable=True),
sa.Column("lng", sa.Float(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 3. normalized_products
if not inspector.has_table("normalized_products"):
op.create_table(
"normalized_products",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("canonical_name", sa.String(300), nullable=False),
sa.Column("category", sa.String(50), nullable=True),
sa.Column("subcategory", sa.String(100), nullable=True),
sa.Column("brand", sa.String(200), nullable=True),
sa.Column("size", sa.String(50), nullable=True),
sa.Column("size_unit", sa.String(10), nullable=True),
sa.Column("upc_variants", sa.JSON(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 4. purchases
if not inspector.has_table("purchases"):
op.create_table(
"purchases",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("user_id", sa.Text(), sa.ForeignKey("users.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("store_location_id", sa.Uuid(), sa.ForeignKey("store_locations.id"), nullable=True),
sa.Column("receipt_id", sa.String(200), nullable=False),
sa.Column("purchase_date", sa.Date(), nullable=False),
sa.Column("total", sa.Numeric(10, 2), nullable=False),
sa.Column("subtotal", sa.Numeric(10, 2), nullable=True),
sa.Column("tax", sa.Numeric(10, 2), nullable=True),
sa.Column("savings_total", sa.Numeric(10, 2), nullable=True),
sa.Column("source_url", sa.String(500), nullable=True),
sa.Column("raw_data", sa.JSON(), nullable=True),
sa.Column("ingested_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.UniqueConstraint("user_id", "store_id", "receipt_id", name="uq_purchase_receipt"),
sa.Index("ix_purchases_user_store", "user_id", "store_id"),
)
# 5. purchase_items
if not inspector.has_table("purchase_items"):
op.create_table(
"purchase_items",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("purchase_id", sa.Uuid(), sa.ForeignKey("purchases.id"), nullable=False),
sa.Column("product_name_raw", sa.String(300), nullable=False),
sa.Column("upc", sa.String(20), nullable=True),
sa.Column("quantity", sa.Numeric(10, 3), nullable=False),
sa.Column("unit_price", sa.Numeric(10, 2), nullable=False),
sa.Column("extended_price", sa.Numeric(10, 2), nullable=False),
sa.Column("regular_price", sa.Numeric(10, 2), nullable=True),
sa.Column("sale_price", sa.Numeric(10, 2), nullable=True),
sa.Column("coupon_discount", sa.Numeric(10, 2), nullable=True),
sa.Column("loyalty_discount", sa.Numeric(10, 2), nullable=True),
sa.Column("category_raw", sa.String(100), nullable=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 6. coupons
if not inspector.has_table("coupons"):
op.create_table(
"coupons",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=True),
sa.Column("title", sa.String(300), nullable=False),
sa.Column("description", sa.String(1000), nullable=True),
sa.Column("discount_type", sa.String(20), nullable=False),
sa.Column("discount_value", sa.Numeric(10, 2), nullable=True),
sa.Column("min_purchase", sa.Numeric(10, 2), nullable=True),
sa.Column("valid_from", sa.Date(), nullable=True),
sa.Column("valid_to", sa.Date(), nullable=True),
sa.Column("requires_clip", sa.Boolean(), server_default=text("false"), nullable=False),
sa.Column("coupon_code", sa.String(100), nullable=True),
sa.Column("source_url", sa.String(500), nullable=True),
sa.Column("scraped_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 7. price_history
if not inspector.has_table("price_history"):
op.create_table(
"price_history",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("observed_date", sa.Date(), nullable=False),
sa.Column("regular_price", sa.Numeric(10, 2), nullable=False),
sa.Column("sale_price", sa.Numeric(10, 2), nullable=True),
sa.Column("loyalty_price", sa.Numeric(10, 2), nullable=True),
sa.Column("coupon_price", sa.Numeric(10, 2), nullable=True),
sa.Column("source", sa.String(20), nullable=False),
sa.Column("purchase_item_id", sa.Uuid(), sa.ForeignKey("purchase_items.id"), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Index("ix_price_history_product_store_date", "normalized_product_id", "store_id", "observed_date"),
)
# 8. shrinkflation_events
if not inspector.has_table("shrinkflation_events"):
op.create_table(
"shrinkflation_events",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=False),
sa.Column("detected_date", sa.Date(), nullable=False),
sa.Column("old_size", sa.String(50), nullable=False),
sa.Column("new_size", sa.String(50), nullable=False),
sa.Column("old_unit", sa.String(10), nullable=True),
sa.Column("new_unit", sa.String(10), nullable=True),
sa.Column("price_at_old_size", sa.Numeric(10, 2), nullable=True),
sa.Column("price_at_new_size", sa.Numeric(10, 2), nullable=True),
sa.Column("confidence", sa.Numeric(3, 2), server_default=text("1.00"), nullable=False),
sa.Column("notes", sa.String(1000), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 9. user_store_accounts
if not inspector.has_table("user_store_accounts"):
op.create_table(
"user_store_accounts",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("user_id", sa.Text(), sa.ForeignKey("users.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("session_data", sa.JSON(), nullable=True),
sa.Column("session_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("last_sync_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("status", sa.String(20), server_default=text("'active'"), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),
)
def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
if inspector.has_table("user_store_accounts"):
op.drop_table("user_store_accounts")
if inspector.has_table("shrinkflation_events"):
op.drop_table("shrinkflation_events")
if inspector.has_table("price_history"):
op.drop_table("price_history")
if inspector.has_table("coupons"):
op.drop_table("coupons")
if inspector.has_table("purchase_items"):
op.drop_table("purchase_items")
if inspector.has_table("purchases"):
op.drop_table("purchases")
if inspector.has_table("normalized_products"):
op.drop_table("normalized_products")
if inspector.has_table("store_locations"):
op.drop_table("store_locations")
if inspector.has_table("stores"):
op.drop_table("stores")
@@ -0,0 +1,38 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+22 -6
View File
@@ -13,14 +13,13 @@ class Settings(BaseSettings):
)
redis_url: str = "redis://localhost:6379/0"
jwt_secret_key: str = "change-me-in-production"
jwt_secret_key: str
jwt_algorithm: str = "HS256"
jwt_access_token_expire_minutes: int = 15
jwt_refresh_token_expire_days: int = 7
service_key: str = "change-me-in-production"
# Valid Fernet key for local dev — MUST be overridden in production
fernet_key: str = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
service_key: str
fernet_key: str
auth_service_url: str = "http://auth:3001"
@@ -35,9 +34,26 @@ class Settings(BaseSettings):
rate_limit_window_seconds: int = 60
rate_limit_enabled: bool = True
_PLACEHOLDER_VALUES = {"change-me-in-production"}
@model_validator(mode="after")
def validate_fernet_key(self):
"""Validate fernet_key is a valid 32-byte url-safe base64 key at startup."""
def validate_secrets(self):
if not self.jwt_secret_key or self.jwt_secret_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_JWT_SECRET_KEY must be set to a secure value. "
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
)
if not self.service_key or self.service_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_SERVICE_KEY must be set to a secure value. "
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
)
if not self.fernet_key or self.fernet_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_FERNET_KEY must be set to a valid Fernet key. "
"Generate one with: python -c "
"'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'"
)
try:
decoded = base64.urlsafe_b64decode(self.fernet_key.encode())
if len(decoded) != 32:
+2 -2
View File
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
)
+34 -7
View File
@@ -19,6 +19,25 @@ from cartsnitch_api.database import get_db
from cartsnitch_api.main import create_app
from cartsnitch_api.models import Base
TEST_JWT_SECRET = secrets.token_urlsafe(32)
TEST_SERVICE_KEY = secrets.token_urlsafe(32)
TEST_FERNET_KEY = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
@pytest.fixture(autouse=True)
def setup_test_settings():
original_jwt = cartsnitch_settings.jwt_secret_key
original_service = cartsnitch_settings.service_key
original_fernet = cartsnitch_settings.fernet_key
cartsnitch_settings.jwt_secret_key = TEST_JWT_SECRET
cartsnitch_settings.service_key = TEST_SERVICE_KEY
cartsnitch_settings.fernet_key = TEST_FERNET_KEY
yield
cartsnitch_settings.jwt_secret_key = original_jwt
cartsnitch_settings.service_key = original_service
cartsnitch_settings.fernet_key = original_fernet
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@@ -60,7 +79,8 @@ async def db_engine():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
# Create Better-Auth tables (not managed by SQLAlchemy models)
await conn.execute(text("""
await conn.execute(
text("""
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
token TEXT NOT NULL UNIQUE,
@@ -71,8 +91,10 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
"""))
await conn.execute(text("""
""")
)
await conn.execute(
text("""
CREATE TABLE IF NOT EXISTS accounts (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
@@ -88,8 +110,10 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
"""))
await conn.execute(text("""
""")
)
await conn.execute(
text("""
CREATE TABLE IF NOT EXISTS verifications (
id TEXT PRIMARY KEY,
identifier TEXT NOT NULL,
@@ -98,7 +122,8 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
"""))
""")
)
yield engine
@@ -133,7 +158,9 @@ async def client(db_engine):
app.dependency_overrides.clear()
async def _create_test_user_and_session(client: AsyncClient, db_engine, **user_overrides) -> tuple[dict, str]:
async def _create_test_user_and_session(
client: AsyncClient, db_engine, **user_overrides
) -> tuple[dict, str]:
"""Create a test user and a valid session directly in the DB.
Returns (user_dict, session_token). Better-Auth stores the raw token
@@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -26,7 +27,9 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
upc_variants: Mapped[list[str] | None] = mapped_column(
JSON().with_variant(JSONB(), "postgresql"), default=list
)
# Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
+6
View File
@@ -9,6 +9,12 @@ server {
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
gzip_min_length 256;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
# Health endpoint for K8s probes
location /health {
access_log off;
+3 -3
View File
@@ -9805,9 +9805,9 @@
}
},
"node_modules/vite": {
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"version": "6.4.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
"devOptional": true,
"license": "MIT",
"dependencies": {
@@ -5,12 +5,14 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
"""
import json
import re
from dataclasses import dataclass
from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import select
from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session
@@ -96,17 +98,24 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match.
Loads products with upc_variants and checks membership in Python
for cross-database compatibility (works on both PostgreSQL and SQLite).
Uses PostgreSQL JSONB containment (@>) for production efficiency.
Falls back to LIKE on SQLite for test compatibility.
"""
# TODO: Use PostgreSQL JSON containment query (@>) for production.
# Current approach loads all products into memory — acceptable for tests
# and small datasets, but will not scale.
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
products = session.execute(stmt).scalars().all()
for product in products:
if product.upc_variants and upc in product.upc_variants:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
dialect_name = session.bind.dialect.name if session.bind else "default"
if dialect_name == "postgresql":
stmt = select(NormalizedProduct).where(
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
func.cast(json.dumps([upc]), JSONB)
)
)
else:
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None