Compare commits

..

2 Commits

Author SHA1 Message Date
Paperclip 1aff898545 fix: update vite to 6.4.2 to patch audit vulnerabilities
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 14:31:02 +00:00
CartSnitch Engineer Bot 24f0dd0e67 fix: replace N+1 UPC query with SQL containment in normalization
- Add PostgreSQL JSONB containment (@>) query for match_by_upc
- Add SQLite LIKE fallback for test compatibility
- Update upc_variants column to JSONB with variant for cross-db support
- Add GIN index migration for upc_variants

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:59:28 +00:00
12 changed files with 83 additions and 194 deletions
@@ -0,0 +1,38 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+8 -33
View File
@@ -1,51 +1,26 @@
"""Redis/DragonflyDB caching helpers.""" """Redis/DragonflyDB caching helpers."""
import redis.asyncio as redis
from cartsnitch_api.config import settings from cartsnitch_api.config import settings
class CacheClient: class CacheClient:
"""Redis/DragonflyDB caching with connection pooling. """Stub for Redis/DragonflyDB caching.
Will be used for expensive queries: price trends, product comparisons. Will be used for expensive queries: price trends, product comparisons.
Cache invalidation via Redis pub/sub events from other services. Cache invalidation via Redis pub/sub events from other services.
""" """
def __init__(self) -> None: def __init__(self) -> None:
self._pool: redis.ConnectionPool | None = None self.url = settings.redis_url
self._client: redis.Redis | None = None
async def initialize(self) -> None:
"""Initialize the Redis connection pool."""
self._pool = redis.ConnectionPool.from_url(
settings.redis_url,
max_connections=20,
decode_responses=True,
)
self._client = redis.Redis(connection_pool=self._pool)
async def close(self) -> None:
"""Close the Redis connection pool."""
if self._client:
await self._client.aclose()
if self._pool:
await self._pool.aclose()
async def get(self, key: str) -> str | None: async def get(self, key: str) -> str | None:
if not self._client: # TODO: implement with redis-py async
return None return None
return await self._client.get(key)
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None: async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
if not self._client: # TODO: implement with redis-py async
return pass
await self._client.set(key, value, ex=ttl_seconds)
async def delete(self, key: str) -> None: async def delete(self, key: str) -> None:
if not self._client: # TODO: implement with redis-py async
return pass
await self._client.delete(key)
cache_client = CacheClient()
+1 -13
View File
@@ -6,14 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
from cartsnitch_api.config import settings from cartsnitch_api.config import settings
engine = create_async_engine( engine = create_async_engine(settings.database_url, echo=False)
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
)
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -21,8 +14,3 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""FastAPI dependency that yields an async DB session.""" """FastAPI dependency that yields an async DB session."""
async with async_session_factory() as session: async with async_session_factory() as session:
yield session yield session
async def dispose_engine() -> None:
"""Dispose the database engine, closing all pooled connections."""
await engine.dispose()
+2 -5
View File
@@ -5,8 +5,6 @@ from contextlib import asynccontextmanager
from fastapi import APIRouter, FastAPI from fastapi import APIRouter, FastAPI
from cartsnitch_api.auth.routes import router as auth_router from cartsnitch_api.auth.routes import router as auth_router
from cartsnitch_api.cache import cache_client
from cartsnitch_api.database import dispose_engine
from cartsnitch_api.middleware.cors import add_cors_middleware from cartsnitch_api.middleware.cors import add_cors_middleware
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
@@ -25,10 +23,9 @@ from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
await cache_client.initialize() # TODO: initialize DB session pool, Redis connection, service clients
yield yield
await cache_client.close() # TODO: cleanup connections
await dispose_engine()
def create_app() -> FastAPI: def create_app() -> FastAPI:
@@ -4,7 +4,6 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints. Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
""" """
import hashlib
import time import time
from collections import defaultdict from collections import defaultdict
from threading import Lock from threading import Lock
@@ -72,8 +71,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "") auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "): if auth_header.startswith("Bearer "):
token = auth_header[7:] token = auth_header[7:]
token_hash = hashlib.sha256(token.encode()).hexdigest() # Use last 16 chars of token as key to avoid storing full tokens
return f"token:{token_hash}", _auth_limiter return f"token:{token[-16:]}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints # Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter return f"ip:{_get_client_ip(request)}", _public_limiter
+1 -32
View File
@@ -1,10 +1,8 @@
"""Tests for rate limiting middleware.""" """Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
class TestSlidingWindowCounter: class TestSlidingWindowCounter:
@@ -55,32 +53,3 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health") resp = await client.get("/health")
assert resp.status_code == 200 assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
+6 -12
View File
@@ -4,23 +4,17 @@ import pg from "pg";
const { Pool } = pg; const { Pool } = pg;
const pool = new Pool({
connectionString:
process.env.DATABASE_URL ??
"postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
const secret = process.env.BETTER_AUTH_SECRET; const secret = process.env.BETTER_AUTH_SECRET;
if (!secret) { if (!secret) {
throw new Error("BETTER_AUTH_SECRET environment variable is required"); throw new Error("BETTER_AUTH_SECRET environment variable is required");
} }
const databaseUrl = process.env.DATABASE_URL;
if (!databaseUrl) {
console.warn(
"WARNING: DATABASE_URL is not set — using default localhost connection. " +
"Set DATABASE_URL for production deployments."
);
}
const pool = new Pool({
connectionString: databaseUrl ?? "postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
export const auth = betterAuth({ export const auth = betterAuth({
database: pool, database: pool,
basePath: "/auth", basePath: "/auth",
@@ -3,6 +3,7 @@
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from sqlalchemy import JSON, String from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -26,7 +27,9 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200)) brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50)) size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10)) size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list) upc_variants: Mapped[list[str] | None] = mapped_column(
JSON().with_variant(JSONB(), "postgresql"), default=list
)
# Relationships # Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product") purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
+1 -34
View File
@@ -1,12 +1,8 @@
"""Service-specific configuration for ReceiptWitness.""" """Service-specific configuration for ReceiptWitness."""
from pydantic import model_validator
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
_PLACEHOLDER_VALUES = {"change-me-in-production"}
class ReceiptWitnessSettings(BaseSettings): class ReceiptWitnessSettings(BaseSettings):
model_config = {"env_prefix": "RW_"} model_config = {"env_prefix": "RW_"}
@@ -34,34 +30,5 @@ class ReceiptWitnessSettings(BaseSettings):
# Mailgun inbound email webhook # Mailgun inbound email webhook
mailgun_webhook_signing_key: str = "" mailgun_webhook_signing_key: str = ""
@model_validator(mode="after")
def validate_required_vars(self):
errors = []
if not self.session_encryption_key or self.session_encryption_key in _PLACEHOLDER_VALUES:
errors.append(
"RW_SESSION_ENCRYPTION_KEY must be set to a secure value. "
'Generate one with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"'
)
if self.notifications_enabled and not self.resend_api_key:
errors.append(
"RW_RESEND_API_KEY must be set when RW_NOTIFICATIONS_ENABLED=true. "
"Get an API key from https://resend.com/api-keys"
)
if errors:
raise ValueError(
"ReceiptWitness startup failed — missing required config:\n"
+ "\n".join(f" - {e}" for e in errors)
)
return self
settings = ReceiptWitnessSettings()
class _LazySettings:
_instance: ReceiptWitnessSettings | None = None
def __getattr__(self, name: str):
if _LazySettings._instance is None:
_LazySettings._instance = ReceiptWitnessSettings()
return getattr(_LazySettings._instance, name)
settings = _LazySettings()
@@ -5,12 +5,14 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence) 2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
""" """
import json
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import select from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@@ -96,17 +98,24 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None: def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match. """Find a normalized product by exact UPC match.
Loads products with upc_variants and checks membership in Python Uses PostgreSQL JSONB containment (@>) for production efficiency.
for cross-database compatibility (works on both PostgreSQL and SQLite). Falls back to LIKE on SQLite for test compatibility.
""" """
# TODO: Use PostgreSQL JSON containment query (@>) for production. dialect_name = session.bind.dialect.name if session.bind else "default"
# Current approach loads all products into memory — acceptable for tests if dialect_name == "postgresql":
# and small datasets, but will not scale. stmt = select(NormalizedProduct).where(
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None)) cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
products = session.execute(stmt).scalars().all() func.cast(json.dumps([upc]), JSONB)
for product in products: )
if product.upc_variants and upc in product.upc_variants: )
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC) else:
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None return None
-4
View File
@@ -1,16 +1,12 @@
"""Shared test fixtures.""" """Shared test fixtures."""
import json import json
import os
from pathlib import Path from pathlib import Path
import pytest import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures" FIXTURES_DIR = Path(__file__).parent / "fixtures"
os.environ.setdefault("RW_SESSION_ENCRYPTION_KEY", "test-secret-key-for-unit-tests-only-32bytes!")
os.environ.setdefault("RW_MAILGUN_WEBHOOK_SIGNING_KEY", "test-mailgun-signing-key")
@pytest.fixture @pytest.fixture
def meijer_receipt_data() -> dict: def meijer_receipt_data() -> dict:
-46
View File
@@ -1,46 +0,0 @@
import pytest
from receiptwitness.config import ReceiptWitnessSettings
def test_valid_config():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
)
assert s.session_encryption_key
def test_missing_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="")
def test_placeholder_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="change-me-in-production")
def test_notifications_enabled_without_resend_key_raises():
with pytest.raises(ValueError, match="RW_RESEND_API_KEY"):
ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="",
)
def test_notifications_disabled_without_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=False,
resend_api_key="",
)
assert s.notifications_enabled is False
def test_notifications_enabled_with_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="re_test_1234567890",
)
assert s.resend_api_key == "re_test_1234567890"