Compare commits

..

1 Commits

Author SHA1 Message Date
CartSnitch Engineer Bot bc5e03e7a0 fix(security): use SHA-256 hash for rate limit key instead of token suffix
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:36:17 +00:00
8 changed files with 52 additions and 76 deletions
@@ -1,38 +0,0 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+2 -2
View File
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
allow_methods=["*"],
allow_headers=["*"],
)
@@ -4,6 +4,7 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
"""
import hashlib
import time
from collections import defaultdict
from threading import Lock
@@ -71,8 +72,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
token = auth_header[7:]
# Use last 16 chars of token as key to avoid storing full tokens
return f"token:{token[-16:]}", _auth_limiter
token_hash = hashlib.sha256(token.encode()).hexdigest()
return f"token:{token_hash}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter
+32 -1
View File
@@ -1,8 +1,10 @@
"""Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
class TestSlidingWindowCounter:
@@ -53,3 +55,32 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health")
assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
@@ -3,7 +3,6 @@
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -27,9 +26,7 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(
JSON().with_variant(JSONB(), "postgresql"), default=list
)
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
# Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
-6
View File
@@ -9,12 +9,6 @@ server {
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
gzip_min_length 256;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
# Health endpoint for K8s probes
location /health {
access_log off;
+3 -3
View File
@@ -9805,9 +9805,9 @@
}
},
"node_modules/vite": {
"version": "6.4.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"devOptional": true,
"license": "MIT",
"dependencies": {
@@ -5,14 +5,12 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
"""
import json
import re
from dataclasses import dataclass
from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy import select
from sqlalchemy.orm import Session
@@ -98,24 +96,17 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match.
Uses PostgreSQL JSONB containment (@>) for production efficiency.
Falls back to LIKE on SQLite for test compatibility.
Loads products with upc_variants and checks membership in Python
for cross-database compatibility (works on both PostgreSQL and SQLite).
"""
dialect_name = session.bind.dialect.name if session.bind else "default"
if dialect_name == "postgresql":
stmt = select(NormalizedProduct).where(
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
func.cast(json.dumps([upc]), JSONB)
)
)
else:
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
# TODO: Use PostgreSQL JSON containment query (@>) for production.
# Current approach loads all products into memory — acceptable for tests
# and small datasets, but will not scale.
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
products = session.execute(stmt).scalars().all()
for product in products:
if product.upc_variants and upc in product.upc_variants:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None