Compare commits

..

8 Commits

Author SHA1 Message Date
Barcode Betty a53daddb9a fix: update vite to resolve high-severity audit vulnerability 2026-04-14 16:09:48 +00:00
Paperclip 3351d74058 fix: add startup validation to auth service config
- Add DATABASE_URL validation after BETTER_AUTH_SECRET check
- Warn clearly when DATABASE_URL is not set (uses localhost default)
- Move pool declaration after validation blocks

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 16:03:37 +00:00
cartsnitch-cto[bot] adfa34f2c2 Merge pull request #186 from cartsnitch/fix/receiptwitness-config-validation
fix: add startup validation to ReceiptWitness config
2026-04-14 14:07:48 +00:00
Paperclip ade03fdd1c fix: add startup validation to ReceiptWitness config
Add Pydantic model_validator to ReceiptWitnessSettings that fails fast
if session_encryption_key is missing or a placeholder value. Conditional
validation for resend_api_key when notifications_enabled=true.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 13:52:24 +00:00
cartsnitch-cto[bot] 5825174f0d Merge pull request #179 from cartsnitch/feature/cart-550-api-lifespan-pooling
feat(api): implement FastAPI lifespan with connection pooling (CAR-550)
2026-04-14 13:48:17 +00:00
Barcode Betty 68e6be1985 feat(api): implement FastAPI lifespan with connection pooling
- Add connection pool config to SQLAlchemy async engine (pool_size=10, max_overflow=20, pool_pre_ping, pool_recycle)
- Implement Redis connection pool in CacheClient with initialize/close lifecycle
- Wire lifespan startup/shutdown to initialize and dispose pools
- Add dispose_engine() for graceful DB pool cleanup on shutdown

Closes CAR-550

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 13:12:46 +00:00
cartsnitch-cto[bot] c2a0263ddd fix(security): use SHA-256 hash for rate limit key instead of token suffix (#169)
fix(security): use SHA-256 hash for rate limit key instead of token suffix
2026-04-14 12:45:15 +00:00
CartSnitch Engineer Bot bc5e03e7a0 fix(security): use SHA-256 hash for rate limit key instead of token suffix
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:36:17 +00:00
12 changed files with 194 additions and 83 deletions
@@ -1,38 +0,0 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+33 -8
View File
@@ -1,26 +1,51 @@
"""Redis/DragonflyDB caching helpers.""" """Redis/DragonflyDB caching helpers."""
import redis.asyncio as redis
from cartsnitch_api.config import settings from cartsnitch_api.config import settings
class CacheClient: class CacheClient:
"""Stub for Redis/DragonflyDB caching. """Redis/DragonflyDB caching with connection pooling.
Will be used for expensive queries: price trends, product comparisons. Will be used for expensive queries: price trends, product comparisons.
Cache invalidation via Redis pub/sub events from other services. Cache invalidation via Redis pub/sub events from other services.
""" """
def __init__(self) -> None: def __init__(self) -> None:
self.url = settings.redis_url self._pool: redis.ConnectionPool | None = None
self._client: redis.Redis | None = None
async def initialize(self) -> None:
"""Initialize the Redis connection pool."""
self._pool = redis.ConnectionPool.from_url(
settings.redis_url,
max_connections=20,
decode_responses=True,
)
self._client = redis.Redis(connection_pool=self._pool)
async def close(self) -> None:
"""Close the Redis connection pool."""
if self._client:
await self._client.aclose()
if self._pool:
await self._pool.aclose()
async def get(self, key: str) -> str | None: async def get(self, key: str) -> str | None:
# TODO: implement with redis-py async if not self._client:
return None return None
return await self._client.get(key)
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None: async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
# TODO: implement with redis-py async if not self._client:
pass return
await self._client.set(key, value, ex=ttl_seconds)
async def delete(self, key: str) -> None: async def delete(self, key: str) -> None:
# TODO: implement with redis-py async if not self._client:
pass return
await self._client.delete(key)
cache_client = CacheClient()
+13 -1
View File
@@ -6,7 +6,14 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
from cartsnitch_api.config import settings from cartsnitch_api.config import settings
engine = create_async_engine(settings.database_url, echo=False) engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
)
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -14,3 +21,8 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""FastAPI dependency that yields an async DB session.""" """FastAPI dependency that yields an async DB session."""
async with async_session_factory() as session: async with async_session_factory() as session:
yield session yield session
async def dispose_engine() -> None:
"""Dispose the database engine, closing all pooled connections."""
await engine.dispose()
+5 -2
View File
@@ -5,6 +5,8 @@ from contextlib import asynccontextmanager
from fastapi import APIRouter, FastAPI from fastapi import APIRouter, FastAPI
from cartsnitch_api.auth.routes import router as auth_router from cartsnitch_api.auth.routes import router as auth_router
from cartsnitch_api.cache import cache_client
from cartsnitch_api.database import dispose_engine
from cartsnitch_api.middleware.cors import add_cors_middleware from cartsnitch_api.middleware.cors import add_cors_middleware
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
@@ -23,9 +25,10 @@ from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
# TODO: initialize DB session pool, Redis connection, service clients await cache_client.initialize()
yield yield
# TODO: cleanup connections await cache_client.close()
await dispose_engine()
def create_app() -> FastAPI: def create_app() -> FastAPI:
@@ -4,6 +4,7 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints. Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
""" """
import hashlib
import time import time
from collections import defaultdict from collections import defaultdict
from threading import Lock from threading import Lock
@@ -71,8 +72,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "") auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "): if auth_header.startswith("Bearer "):
token = auth_header[7:] token = auth_header[7:]
# Use last 16 chars of token as key to avoid storing full tokens token_hash = hashlib.sha256(token.encode()).hexdigest()
return f"token:{token[-16:]}", _auth_limiter return f"token:{token_hash}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints # Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter return f"ip:{_get_client_ip(request)}", _public_limiter
+32 -1
View File
@@ -1,8 +1,10 @@
"""Tests for rate limiting middleware.""" """Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
class TestSlidingWindowCounter: class TestSlidingWindowCounter:
@@ -53,3 +55,32 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health") resp = await client.get("/health")
assert resp.status_code == 200 assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
+12 -6
View File
@@ -4,17 +4,23 @@ import pg from "pg";
const { Pool } = pg; const { Pool } = pg;
const pool = new Pool({
connectionString:
process.env.DATABASE_URL ??
"postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
const secret = process.env.BETTER_AUTH_SECRET; const secret = process.env.BETTER_AUTH_SECRET;
if (!secret) { if (!secret) {
throw new Error("BETTER_AUTH_SECRET environment variable is required"); throw new Error("BETTER_AUTH_SECRET environment variable is required");
} }
const databaseUrl = process.env.DATABASE_URL;
if (!databaseUrl) {
console.warn(
"WARNING: DATABASE_URL is not set — using default localhost connection. " +
"Set DATABASE_URL for production deployments."
);
}
const pool = new Pool({
connectionString: databaseUrl ?? "postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
export const auth = betterAuth({ export const auth = betterAuth({
database: pool, database: pool,
basePath: "/auth", basePath: "/auth",
@@ -3,7 +3,6 @@
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from sqlalchemy import JSON, String from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -27,9 +26,7 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200)) brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50)) size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10)) size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column( upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
JSON().with_variant(JSONB(), "postgresql"), default=list
)
# Relationships # Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product") purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
+34 -1
View File
@@ -1,8 +1,12 @@
"""Service-specific configuration for ReceiptWitness.""" """Service-specific configuration for ReceiptWitness."""
from pydantic import model_validator
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings
_PLACEHOLDER_VALUES = {"change-me-in-production"}
class ReceiptWitnessSettings(BaseSettings): class ReceiptWitnessSettings(BaseSettings):
model_config = {"env_prefix": "RW_"} model_config = {"env_prefix": "RW_"}
@@ -30,5 +34,34 @@ class ReceiptWitnessSettings(BaseSettings):
# Mailgun inbound email webhook # Mailgun inbound email webhook
mailgun_webhook_signing_key: str = "" mailgun_webhook_signing_key: str = ""
@model_validator(mode="after")
def validate_required_vars(self):
errors = []
if not self.session_encryption_key or self.session_encryption_key in _PLACEHOLDER_VALUES:
errors.append(
"RW_SESSION_ENCRYPTION_KEY must be set to a secure value. "
'Generate one with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"'
)
if self.notifications_enabled and not self.resend_api_key:
errors.append(
"RW_RESEND_API_KEY must be set when RW_NOTIFICATIONS_ENABLED=true. "
"Get an API key from https://resend.com/api-keys"
)
if errors:
raise ValueError(
"ReceiptWitness startup failed — missing required config:\n"
+ "\n".join(f" - {e}" for e in errors)
)
return self
settings = ReceiptWitnessSettings()
class _LazySettings:
_instance: ReceiptWitnessSettings | None = None
def __getattr__(self, name: str):
if _LazySettings._instance is None:
_LazySettings._instance = ReceiptWitnessSettings()
return getattr(_LazySettings._instance, name)
settings = _LazySettings()
@@ -5,14 +5,12 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence) 2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
""" """
import json
import re import re
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import cast, func, select, String from sqlalchemy import select
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
@@ -98,24 +96,17 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None: def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match. """Find a normalized product by exact UPC match.
Uses PostgreSQL JSONB containment (@>) for production efficiency. Loads products with upc_variants and checks membership in Python
Falls back to LIKE on SQLite for test compatibility. for cross-database compatibility (works on both PostgreSQL and SQLite).
""" """
dialect_name = session.bind.dialect.name if session.bind else "default" # TODO: Use PostgreSQL JSON containment query (@>) for production.
if dialect_name == "postgresql": # Current approach loads all products into memory — acceptable for tests
stmt = select(NormalizedProduct).where( # and small datasets, but will not scale.
cast(NormalizedProduct.upc_variants, JSONB).op("@>")( stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
func.cast(json.dumps([upc]), JSONB) products = session.execute(stmt).scalars().all()
) for product in products:
) if product.upc_variants and upc in product.upc_variants:
else: return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None return None
+4
View File
@@ -1,12 +1,16 @@
"""Shared test fixtures.""" """Shared test fixtures."""
import json import json
import os
from pathlib import Path from pathlib import Path
import pytest import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures" FIXTURES_DIR = Path(__file__).parent / "fixtures"
os.environ.setdefault("RW_SESSION_ENCRYPTION_KEY", "test-secret-key-for-unit-tests-only-32bytes!")
os.environ.setdefault("RW_MAILGUN_WEBHOOK_SIGNING_KEY", "test-mailgun-signing-key")
@pytest.fixture @pytest.fixture
def meijer_receipt_data() -> dict: def meijer_receipt_data() -> dict:
+46
View File
@@ -0,0 +1,46 @@
import pytest
from receiptwitness.config import ReceiptWitnessSettings
def test_valid_config():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
)
assert s.session_encryption_key
def test_missing_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="")
def test_placeholder_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="change-me-in-production")
def test_notifications_enabled_without_resend_key_raises():
with pytest.raises(ValueError, match="RW_RESEND_API_KEY"):
ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="",
)
def test_notifications_disabled_without_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=False,
resend_api_key="",
)
assert s.notifications_enabled is False
def test_notifications_enabled_with_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="re_test_1234567890",
)
assert s.resend_api_key == "re_test_1234567890"