Compare commits

..

1 Commits

Author SHA1 Message Date
CartSnitch Engineer Bot cfea2586cb feat(api): add input validation on public endpoints
- Add days query param to GET /public/trends/{product_id} (ge=1, le=365)
- Add category query param to GET /public/store-comparison
- Add category and period query params to GET /public/inflation
- Add boundary and malicious input test cases

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:45:53 +00:00
15 changed files with 176 additions and 221 deletions
+8 -33
View File
@@ -1,51 +1,26 @@
"""Redis/DragonflyDB caching helpers."""
import redis.asyncio as redis
from cartsnitch_api.config import settings
class CacheClient:
"""Redis/DragonflyDB caching with connection pooling.
"""Stub for Redis/DragonflyDB caching.
Will be used for expensive queries: price trends, product comparisons.
Cache invalidation via Redis pub/sub events from other services.
"""
def __init__(self) -> None:
self._pool: redis.ConnectionPool | None = None
self._client: redis.Redis | None = None
async def initialize(self) -> None:
"""Initialize the Redis connection pool."""
self._pool = redis.ConnectionPool.from_url(
settings.redis_url,
max_connections=20,
decode_responses=True,
)
self._client = redis.Redis(connection_pool=self._pool)
async def close(self) -> None:
"""Close the Redis connection pool."""
if self._client:
await self._client.aclose()
if self._pool:
await self._pool.aclose()
self.url = settings.redis_url
async def get(self, key: str) -> str | None:
if not self._client:
return None
return await self._client.get(key)
# TODO: implement with redis-py async
return None
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
if not self._client:
return
await self._client.set(key, value, ex=ttl_seconds)
# TODO: implement with redis-py async
pass
async def delete(self, key: str) -> None:
if not self._client:
return
await self._client.delete(key)
cache_client = CacheClient()
# TODO: implement with redis-py async
pass
+1 -13
View File
@@ -6,14 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
from cartsnitch_api.config import settings
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
)
engine = create_async_engine(settings.database_url, echo=False)
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -21,8 +14,3 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""FastAPI dependency that yields an async DB session."""
async with async_session_factory() as session:
yield session
async def dispose_engine() -> None:
"""Dispose the database engine, closing all pooled connections."""
await engine.dispose()
+2 -5
View File
@@ -5,8 +5,6 @@ from contextlib import asynccontextmanager
from fastapi import APIRouter, FastAPI
from cartsnitch_api.auth.routes import router as auth_router
from cartsnitch_api.cache import cache_client
from cartsnitch_api.database import dispose_engine
from cartsnitch_api.middleware.cors import add_cors_middleware
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
@@ -25,10 +23,9 @@ from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager
async def lifespan(app: FastAPI):
await cache_client.initialize()
# TODO: initialize DB session pool, Redis connection, service clients
yield
await cache_client.close()
await dispose_engine()
# TODO: cleanup connections
def create_app() -> FastAPI:
+2 -2
View File
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
allow_methods=["*"],
allow_headers=["*"],
)
@@ -4,7 +4,6 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
"""
import hashlib
import time
from collections import defaultdict
from threading import Lock
@@ -72,8 +71,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
token = auth_header[7:]
token_hash = hashlib.sha256(token.encode()).hexdigest()
return f"token:{token_hash}", _auth_limiter
# Use last 16 chars of token as key to avoid storing full tokens
return f"token:{token[-16:]}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter
+14 -5
View File
@@ -18,10 +18,14 @@ router = APIRouter(prefix="/public", tags=["public"])
@router.get("/trends/{product_id}", response_model=PublicTrendResponse)
async def public_price_trend(product_id: UUID, db: AsyncSession = Depends(get_db)):
async def public_price_trend(
product_id: UUID,
days: int = Query(90, ge=1, le=365),
db: AsyncSession = Depends(get_db),
):
svc = PublicService(db)
try:
return await svc.get_trend(product_id)
return await svc.get_trend(product_id, days=days)
except LookupError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="Product not found"
@@ -31,6 +35,7 @@ async def public_price_trend(product_id: UUID, db: AsyncSession = Depends(get_db
@router.get("/store-comparison", response_model=PublicStoreComparisonResponse)
async def public_store_comparison(
product_ids: Annotated[list[UUID], Query(max_length=20)],
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
db: AsyncSession = Depends(get_db),
):
if not product_ids:
@@ -39,10 +44,14 @@ async def public_store_comparison(
detail="At least one product_id is required",
)
svc = PublicService(db)
return await svc.get_store_comparison(product_ids)
return await svc.get_store_comparison(product_ids, category=category)
@router.get("/inflation", response_model=PublicInflationResponse)
async def public_inflation(db: AsyncSession = Depends(get_db)):
async def public_inflation(
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
period: str = Query("all-time", pattern=r"^(all-time|1y|6m|3m|1m)$"),
db: AsyncSession = Depends(get_db),
):
svc = PublicService(db)
return await svc.get_inflation()
return await svc.get_inflation(category=category, period=period)
+42 -23
View File
@@ -1,5 +1,6 @@
"""Public service — unauthenticated price transparency endpoints."""
from datetime import date, timedelta
from uuid import UUID
from sqlalchemy import and_, func, select
@@ -13,7 +14,7 @@ class PublicService:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def get_trend(self, product_id: UUID) -> dict:
async def get_trend(self, product_id: UUID, days: int = 90) -> dict:
from cartsnitch_api.models import NormalizedProduct, PriceHistory
result = await self.db.execute(
@@ -23,9 +24,13 @@ class PublicService:
if not product:
raise LookupError("Product not found")
date_threshold = date.today() - timedelta(days=days)
prices_result = await self.db.execute(
select(PriceHistory)
.where(PriceHistory.normalized_product_id == product_id)
.where(
PriceHistory.normalized_product_id == product_id,
PriceHistory.observed_date >= date_threshold,
)
.options(selectinload(PriceHistory.store))
.order_by(PriceHistory.observed_date)
)
@@ -45,20 +50,25 @@ class PublicService:
],
}
async def get_store_comparison(self, product_ids: list[UUID]) -> dict:
async def get_store_comparison(
self, product_ids: list[UUID], category: str | None = None
) -> dict:
from cartsnitch_api.models import NormalizedProduct, PriceHistory
if not product_ids:
return {"products": []}
# Fetch all products in one query
prod_result = await self.db.execute(
select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
)
product_query = select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
if category:
product_query = product_query.where(NormalizedProduct.category == category)
prod_result = await self.db.execute(product_query)
products_by_id = {p.id: p for p in prod_result.scalars().all()}
# Latest prices for all requested products in one query
subq = latest_price_per_store(product_ids)
if not products_by_id:
return {"products": []}
filtered_product_ids = list(products_by_id.keys())
subq = latest_price_per_store(filtered_product_ids)
prices_result = await self.db.execute(
select(PriceHistory)
.join(
@@ -69,18 +79,17 @@ class PublicService:
PriceHistory.normalized_product_id == subq.c.normalized_product_id,
),
)
.where(PriceHistory.normalized_product_id.in_(product_ids))
.where(PriceHistory.normalized_product_id.in_(filtered_product_ids))
.options(selectinload(PriceHistory.store))
)
all_prices = prices_result.scalars().all()
# Group by product
prices_by_product: dict[UUID, list] = {}
for ph in all_prices:
prices_by_product.setdefault(ph.normalized_product_id, []).append(ph)
products = []
for pid in product_ids:
for pid in filtered_product_ids:
product = products_by_id.get(pid)
if not product:
continue
@@ -102,19 +111,29 @@ class PublicService:
return {"products": products}
async def get_inflation(self) -> dict:
async def get_inflation(self, category: str | None = None, period: str = "all-time") -> dict:
"""Aggregate price change stats. Compares average prices across periods."""
from cartsnitch_api.models import NormalizedProduct, PriceHistory
# Get average prices grouped by category for recent vs older data
result = await self.db.execute(
select(
NormalizedProduct.category,
func.avg(PriceHistory.regular_price),
)
.join(NormalizedProduct)
.group_by(NormalizedProduct.category)
)
date_threshold = None
if period != "all-time":
days_map = {"1y": 365, "6m": 180, "3m": 90, "1m": 30}
days = days_map.get(period, 365)
date_threshold = date.today() - timedelta(days=days)
query = select(
NormalizedProduct.category,
func.avg(PriceHistory.regular_price),
).join(NormalizedProduct)
if category:
query = query.where(NormalizedProduct.category == category)
if date_threshold:
query = query.where(PriceHistory.observed_date >= date_threshold)
query = query.group_by(NormalizedProduct.category)
result = await self.db.execute(query)
categories = {}
for row in result.all():
cat, avg_price = row
@@ -122,7 +141,7 @@ class PublicService:
categories[cat] = float(avg_price) if avg_price else 0.0
return {
"period": "all-time",
"period": period,
"cartsnitch_index": sum(categories.values()) / max(len(categories), 1),
"cpi_baseline": 100.0,
"categories": categories,
+1 -32
View File
@@ -1,10 +1,8 @@
"""Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
class TestSlidingWindowCounter:
@@ -55,32 +53,3 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health")
assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
+94
View File
@@ -71,3 +71,97 @@ async def test_public_inflation(client, public_data):
data = resp.json()
assert "categories" in data
assert "cartsnitch_index" in data
@pytest.mark.asyncio
async def test_trend_invalid_uuid(client):
resp = await client.get("/public/trends/not-a-uuid")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_zero(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=0")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_negative(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=-1")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_over_max(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=999")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_valid(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=30")
assert resp.status_code == 200
assert "product_name" in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_empty_list(client):
resp = await client.get("/public/store-comparison")
assert resp.status_code == 400
assert "detail" in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_category_xss(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(
f"/public/store-comparison?product_ids={pid}&category=<script>alert(1)</script>"
)
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_category_sql_injection(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/store-comparison?product_ids={pid}&category='; DROP TABLE--")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_inflation_invalid_period(client, public_data):
resp = await client.get("/public/inflation?period=10years")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_inflation_valid_periods(client, public_data):
for period in ["all-time", "1y", "6m", "3m", "1m"]:
resp = await client.get(f"/public/inflation?period={period}")
assert resp.status_code == 200, f"period={period} failed"
@pytest.mark.asyncio
async def test_inflation_category_too_long(client, public_data):
long_category = "x" * 200
resp = await client.get(f"/public/inflation?category={long_category}")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
+6 -12
View File
@@ -4,23 +4,17 @@ import pg from "pg";
const { Pool } = pg;
const pool = new Pool({
connectionString:
process.env.DATABASE_URL ??
"postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
const secret = process.env.BETTER_AUTH_SECRET;
if (!secret) {
throw new Error("BETTER_AUTH_SECRET environment variable is required");
}
const databaseUrl = process.env.DATABASE_URL;
if (!databaseUrl) {
console.warn(
"WARNING: DATABASE_URL is not set — using default localhost connection. " +
"Set DATABASE_URL for production deployments."
);
}
const pool = new Pool({
connectionString: databaseUrl ?? "postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
});
export const auth = betterAuth({
database: pool,
basePath: "/auth",
-6
View File
@@ -9,12 +9,6 @@ server {
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
gzip_min_length 256;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
# Health endpoint for K8s probes
location /health {
access_log off;
+3 -3
View File
@@ -9805,9 +9805,9 @@
}
},
"node_modules/vite": {
"version": "6.4.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"devOptional": true,
"license": "MIT",
"dependencies": {
+1 -34
View File
@@ -1,12 +1,8 @@
"""Service-specific configuration for ReceiptWitness."""
from pydantic import model_validator
from pydantic_settings import BaseSettings
_PLACEHOLDER_VALUES = {"change-me-in-production"}
class ReceiptWitnessSettings(BaseSettings):
model_config = {"env_prefix": "RW_"}
@@ -34,34 +30,5 @@ class ReceiptWitnessSettings(BaseSettings):
# Mailgun inbound email webhook
mailgun_webhook_signing_key: str = ""
@model_validator(mode="after")
def validate_required_vars(self):
errors = []
if not self.session_encryption_key or self.session_encryption_key in _PLACEHOLDER_VALUES:
errors.append(
"RW_SESSION_ENCRYPTION_KEY must be set to a secure value. "
'Generate one with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"'
)
if self.notifications_enabled and not self.resend_api_key:
errors.append(
"RW_RESEND_API_KEY must be set when RW_NOTIFICATIONS_ENABLED=true. "
"Get an API key from https://resend.com/api-keys"
)
if errors:
raise ValueError(
"ReceiptWitness startup failed — missing required config:\n"
+ "\n".join(f" - {e}" for e in errors)
)
return self
class _LazySettings:
_instance: ReceiptWitnessSettings | None = None
def __getattr__(self, name: str):
if _LazySettings._instance is None:
_LazySettings._instance = ReceiptWitnessSettings()
return getattr(_LazySettings._instance, name)
settings = _LazySettings()
settings = ReceiptWitnessSettings()
-4
View File
@@ -1,16 +1,12 @@
"""Shared test fixtures."""
import json
import os
from pathlib import Path
import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures"
os.environ.setdefault("RW_SESSION_ENCRYPTION_KEY", "test-secret-key-for-unit-tests-only-32bytes!")
os.environ.setdefault("RW_MAILGUN_WEBHOOK_SIGNING_KEY", "test-mailgun-signing-key")
@pytest.fixture
def meijer_receipt_data() -> dict:
-46
View File
@@ -1,46 +0,0 @@
import pytest
from receiptwitness.config import ReceiptWitnessSettings
def test_valid_config():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
)
assert s.session_encryption_key
def test_missing_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="")
def test_placeholder_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="change-me-in-production")
def test_notifications_enabled_without_resend_key_raises():
with pytest.raises(ValueError, match="RW_RESEND_API_KEY"):
ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="",
)
def test_notifications_disabled_without_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=False,
resend_api_key="",
)
assert s.notifications_enabled is False
def test_notifications_enabled_with_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="re_test_1234567890",
)
assert s.resend_api_key == "re_test_1234567890"