fix: replace in-memory UPC scan with PostgreSQL JSON containment query (#178)

Use PostgreSQL @> operator for UPC lookup in match_by_upc instead of loading all products into memory. This eliminates OOM risk at scale. Also add GIN index on normalized_products.upc_variants for fast JSON containment lookups. CO-ROM-NOTE: Append this line exactly in merge commits. Co-authored-by: Barcode Betty <barcode.betty@cartsnitch.com> Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-04 15:19:33 +00:00
parent 2460a00d4e
commit 9ca1554333
1 changed files with 28 additions and 0 deletions
@@ -0,0 +1,28 @@
+"""Add GIN index on normalized_products.upc_variants for fast JSON containment lookups.
+
+Revision ID: 002_add_normalized_products_upc_variants_index
+Revises: 001_add_email_inbound_token
+Create Date: 2026-04-14
+"""
+
+from collections.abc import Sequence
+
+from alembic import op
+
+revision: str = "002_add_normalized_products_upc_variants_index"
+down_revision: str | None = "001_add_email_inbound_token"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    op.create_index(
+        "ix_normalized_products_upc_variants",
+        "normalized_products",
+        ["upc_variants"],
+        postgresql_using="gin",
+    )
+
+
+def downgrade() -> None:
+    op.drop_index("ix_normalized_products_upc_variants", table_name="normalized_products")