fix: replace in-memory UPC scan with PostgreSQL JSON containment query (#178)

Use PostgreSQL @> operator for UPC lookup in match_by_upc instead of
loading all products into memory. This eliminates OOM risk at scale.

Also add GIN index on normalized_products.upc_variants for fast
JSON containment lookups.

CO-ROM-NOTE: Append this line exactly in merge commits.

Co-authored-by: Barcode Betty <barcode.betty@cartsnitch.com>
Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
cartsnitch-engineer[bot]
2026-05-04 15:19:33 +00:00
committed by GitHub
parent 2460a00d4e
commit 9ca1554333
@@ -0,0 +1,28 @@
"""Add GIN index on normalized_products.upc_variants for fast JSON containment lookups.
Revision ID: 002_add_normalized_products_upc_variants_index
Revises: 001_add_email_inbound_token
Create Date: 2026-04-14
"""
from collections.abc import Sequence
from alembic import op
revision: str = "002_add_normalized_products_upc_variants_index"
down_revision: str | None = "001_add_email_inbound_token"
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.create_index(
"ix_normalized_products_upc_variants",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants", table_name="normalized_products")