Compare commits

..

7 Commits

Author SHA1 Message Date
Paperclip c85c9b12a7 feat(ci): add Trivy image vulnerability scanning to all Docker builds
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 15:21:08 +00:00
cartsnitch-cto[bot] adfa34f2c2 Merge pull request #186 from cartsnitch/fix/receiptwitness-config-validation
fix: add startup validation to ReceiptWitness config
2026-04-14 14:07:48 +00:00
Paperclip ade03fdd1c fix: add startup validation to ReceiptWitness config
Add Pydantic model_validator to ReceiptWitnessSettings that fails fast
if session_encryption_key is missing or a placeholder value. Conditional
validation for resend_api_key when notifications_enabled=true.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 13:52:24 +00:00
cartsnitch-cto[bot] 5825174f0d Merge pull request #179 from cartsnitch/feature/cart-550-api-lifespan-pooling
feat(api): implement FastAPI lifespan with connection pooling (CAR-550)
2026-04-14 13:48:17 +00:00
Barcode Betty 68e6be1985 feat(api): implement FastAPI lifespan with connection pooling
- Add connection pool config to SQLAlchemy async engine (pool_size=10, max_overflow=20, pool_pre_ping, pool_recycle)
- Implement Redis connection pool in CacheClient with initialize/close lifecycle
- Wire lifespan startup/shutdown to initialize and dispose pools
- Add dispose_engine() for graceful DB pool cleanup on shutdown

Closes CAR-550

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 13:12:46 +00:00
cartsnitch-cto[bot] c2a0263ddd fix(security): use SHA-256 hash for rate limit key instead of token suffix (#169)
fix(security): use SHA-256 hash for rate limit key instead of token suffix
2026-04-14 12:45:15 +00:00
CartSnitch Engineer Bot bc5e03e7a0 fix(security): use SHA-256 hash for rate limit key instead of token suffix
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:36:17 +00:00
13 changed files with 297 additions and 88 deletions
+112 -8
View File
@@ -13,6 +13,7 @@ concurrency:
permissions:
contents: write
packages: write
security-events: write
env:
REGISTRY: ghcr.io
@@ -151,17 +152,42 @@ jobs:
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: .
push: ${{ github.event_name == 'push' }}
load: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: prod
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-frontend.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-frontend.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
@@ -221,15 +247,41 @@ jobs:
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push auth Docker image
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: ./auth
file: ./auth/Dockerfile
push: ${{ github.event_name == 'push' }}
load: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.AUTH_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-auth.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-auth.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: ./auth
file: ./auth/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
build-and-push-receiptwitness:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
@@ -278,15 +330,41 @@ jobs:
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push receiptwitness image
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: .
file: ./receiptwitness/Dockerfile
push: ${{ github.event_name == 'push' }}
load: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.RECEIPTWITNESS_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-receiptwitness.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-receiptwitness.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
file: ./receiptwitness/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
build-and-push-api:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
@@ -335,15 +413,41 @@ jobs:
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push API Docker image
- name: Build Docker image
uses: docker/build-push-action@v6
with:
context: ./api
file: ./api/Dockerfile
push: ${{ github.event_name == 'push' }}
load: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.API_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-api.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-api.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: ./api
file: ./api/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
deploy-dev:
runs-on: runners-cartsnitch
needs: [build-and-push, build-and-push-auth, build-and-push-receiptwitness, build-and-push-api]
@@ -1,38 +0,0 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+33 -8
View File
@@ -1,26 +1,51 @@
"""Redis/DragonflyDB caching helpers."""
import redis.asyncio as redis
from cartsnitch_api.config import settings
class CacheClient:
"""Stub for Redis/DragonflyDB caching.
"""Redis/DragonflyDB caching with connection pooling.
Will be used for expensive queries: price trends, product comparisons.
Cache invalidation via Redis pub/sub events from other services.
"""
def __init__(self) -> None:
self.url = settings.redis_url
self._pool: redis.ConnectionPool | None = None
self._client: redis.Redis | None = None
async def initialize(self) -> None:
"""Initialize the Redis connection pool."""
self._pool = redis.ConnectionPool.from_url(
settings.redis_url,
max_connections=20,
decode_responses=True,
)
self._client = redis.Redis(connection_pool=self._pool)
async def close(self) -> None:
"""Close the Redis connection pool."""
if self._client:
await self._client.aclose()
if self._pool:
await self._pool.aclose()
async def get(self, key: str) -> str | None:
# TODO: implement with redis-py async
return None
if not self._client:
return None
return await self._client.get(key)
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
# TODO: implement with redis-py async
pass
if not self._client:
return
await self._client.set(key, value, ex=ttl_seconds)
async def delete(self, key: str) -> None:
# TODO: implement with redis-py async
pass
if not self._client:
return
await self._client.delete(key)
cache_client = CacheClient()
+13 -1
View File
@@ -6,7 +6,14 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
from cartsnitch_api.config import settings
engine = create_async_engine(settings.database_url, echo=False)
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
)
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -14,3 +21,8 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""FastAPI dependency that yields an async DB session."""
async with async_session_factory() as session:
yield session
async def dispose_engine() -> None:
"""Dispose the database engine, closing all pooled connections."""
await engine.dispose()
+5 -2
View File
@@ -5,6 +5,8 @@ from contextlib import asynccontextmanager
from fastapi import APIRouter, FastAPI
from cartsnitch_api.auth.routes import router as auth_router
from cartsnitch_api.cache import cache_client
from cartsnitch_api.database import dispose_engine
from cartsnitch_api.middleware.cors import add_cors_middleware
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
@@ -23,9 +25,10 @@ from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager
async def lifespan(app: FastAPI):
# TODO: initialize DB session pool, Redis connection, service clients
await cache_client.initialize()
yield
# TODO: cleanup connections
await cache_client.close()
await dispose_engine()
def create_app() -> FastAPI:
@@ -4,6 +4,7 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
"""
import hashlib
import time
from collections import defaultdict
from threading import Lock
@@ -71,8 +72,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
token = auth_header[7:]
# Use last 16 chars of token as key to avoid storing full tokens
return f"token:{token[-16:]}", _auth_limiter
token_hash = hashlib.sha256(token.encode()).hexdigest()
return f"token:{token_hash}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter
+32 -1
View File
@@ -1,8 +1,10 @@
"""Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
class TestSlidingWindowCounter:
@@ -53,3 +55,32 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health")
assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
@@ -3,7 +3,6 @@
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -27,9 +26,7 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(
JSON().with_variant(JSONB(), "postgresql"), default=list
)
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
# Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
+3 -3
View File
@@ -9805,9 +9805,9 @@
}
},
"node_modules/vite": {
"version": "6.4.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"devOptional": true,
"license": "MIT",
"dependencies": {
+34 -1
View File
@@ -1,8 +1,12 @@
"""Service-specific configuration for ReceiptWitness."""
from pydantic import model_validator
from pydantic_settings import BaseSettings
_PLACEHOLDER_VALUES = {"change-me-in-production"}
class ReceiptWitnessSettings(BaseSettings):
model_config = {"env_prefix": "RW_"}
@@ -30,5 +34,34 @@ class ReceiptWitnessSettings(BaseSettings):
# Mailgun inbound email webhook
mailgun_webhook_signing_key: str = ""
@model_validator(mode="after")
def validate_required_vars(self):
errors = []
if not self.session_encryption_key or self.session_encryption_key in _PLACEHOLDER_VALUES:
errors.append(
"RW_SESSION_ENCRYPTION_KEY must be set to a secure value. "
'Generate one with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"'
)
if self.notifications_enabled and not self.resend_api_key:
errors.append(
"RW_RESEND_API_KEY must be set when RW_NOTIFICATIONS_ENABLED=true. "
"Get an API key from https://resend.com/api-keys"
)
if errors:
raise ValueError(
"ReceiptWitness startup failed — missing required config:\n"
+ "\n".join(f" - {e}" for e in errors)
)
return self
settings = ReceiptWitnessSettings()
class _LazySettings:
_instance: ReceiptWitnessSettings | None = None
def __getattr__(self, name: str):
if _LazySettings._instance is None:
_LazySettings._instance = ReceiptWitnessSettings()
return getattr(_LazySettings._instance, name)
settings = _LazySettings()
@@ -5,14 +5,12 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
"""
import json
import re
from dataclasses import dataclass
from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -98,24 +96,17 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match.
Uses PostgreSQL JSONB containment (@>) for production efficiency.
Falls back to LIKE on SQLite for test compatibility.
Loads products with upc_variants and checks membership in Python
for cross-database compatibility (works on both PostgreSQL and SQLite).
"""
dialect_name = session.bind.dialect.name if session.bind else "default"
if dialect_name == "postgresql":
stmt = select(NormalizedProduct).where(
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
func.cast(json.dumps([upc]), JSONB)
)
)
else:
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
# TODO: Use PostgreSQL JSON containment query (@>) for production.
# Current approach loads all products into memory — acceptable for tests
# and small datasets, but will not scale.
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
products = session.execute(stmt).scalars().all()
for product in products:
if product.upc_variants and upc in product.upc_variants:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None
+4
View File
@@ -1,12 +1,16 @@
"""Shared test fixtures."""
import json
import os
from pathlib import Path
import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures"
os.environ.setdefault("RW_SESSION_ENCRYPTION_KEY", "test-secret-key-for-unit-tests-only-32bytes!")
os.environ.setdefault("RW_MAILGUN_WEBHOOK_SIGNING_KEY", "test-mailgun-signing-key")
@pytest.fixture
def meijer_receipt_data() -> dict:
+46
View File
@@ -0,0 +1,46 @@
import pytest
from receiptwitness.config import ReceiptWitnessSettings
def test_valid_config():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
)
assert s.session_encryption_key
def test_missing_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="")
def test_placeholder_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="change-me-in-production")
def test_notifications_enabled_without_resend_key_raises():
with pytest.raises(ValueError, match="RW_RESEND_API_KEY"):
ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="",
)
def test_notifications_disabled_without_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=False,
resend_api_key="",
)
assert s.notifications_enabled is False
def test_notifications_enabled_with_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="re_test_1234567890",
)
assert s.resend_api_key == "re_test_1234567890"