Compare commits

..

4 Commits

Author SHA1 Message Date
Paperclip 1aff898545 fix: update vite to 6.4.2 to patch audit vulnerabilities
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 14:31:02 +00:00
CartSnitch Engineer Bot 24f0dd0e67 fix: replace N+1 UPC query with SQL containment in normalization
- Add PostgreSQL JSONB containment (@>) query for match_by_upc
- Add SQLite LIKE fallback for test compatibility
- Update upc_variants column to JSONB with variant for cross-db support
- Add GIN index migration for upc_variants

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:59:28 +00:00
cartsnitch-cto[bot] da96ec7dc4 Merge pull request #172 from cartsnitch/fix/cors-security-headers
CTO review: LGTM. CORS methods restricted to explicit list (no TRACE/CONNECT), headers whitelisted, nginx security headers added (X-Frame-Options, X-Content-Type-Options, Referrer-Policy, CSP). Clean diff, CI green.
2026-04-14 11:57:52 +00:00
CartSnitch Engineer Bot 37798251be fix: restrict CORS to explicit methods and add security headers
- Replace allow_methods=["*"] with explicit list: GET, POST, PUT, DELETE, PATCH, OPTIONS
- Replace allow_headers=["*"] with explicit list: Content-Type, Authorization, Accept, Origin, X-Requested-With
- Add X-Frame-Options, X-Content-Type-Options, Referrer-Policy, CSP nginx headers

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-14 11:49:02 +00:00
9 changed files with 101 additions and 167 deletions
@@ -0,0 +1,38 @@
"""Add GIN index on upc_variants and alter column to JSONB.
Revision ID: 009_add_gin_index_upc_variants
Revises: 008_create_domain_tables
Create Date: 2026-04-14
"""
import sqlalchemy as sa
from alembic import op
revision = "009_add_gin_index_upc_variants"
down_revision = "008_create_domain_tables"
branch_labels = None
depends_on = None
def upgrade() -> None:
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.dialects.postgresql.JSONB(),
postgresql_using="upc_variants::jsonb",
)
op.create_index(
"ix_normalized_products_upc_variants_gin",
"normalized_products",
["upc_variants"],
postgresql_using="gin",
)
def downgrade() -> None:
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
op.alter_column(
"normalized_products",
"upc_variants",
type_=sa.JSON(),
)
+2 -2
View File
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
)
+5 -14
View File
@@ -18,14 +18,10 @@ router = APIRouter(prefix="/public", tags=["public"])
@router.get("/trends/{product_id}", response_model=PublicTrendResponse)
async def public_price_trend(
product_id: UUID,
days: int = Query(90, ge=1, le=365),
db: AsyncSession = Depends(get_db),
):
async def public_price_trend(product_id: UUID, db: AsyncSession = Depends(get_db)):
svc = PublicService(db)
try:
return await svc.get_trend(product_id, days=days)
return await svc.get_trend(product_id)
except LookupError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="Product not found"
@@ -35,7 +31,6 @@ async def public_price_trend(
@router.get("/store-comparison", response_model=PublicStoreComparisonResponse)
async def public_store_comparison(
product_ids: Annotated[list[UUID], Query(max_length=20)],
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
db: AsyncSession = Depends(get_db),
):
if not product_ids:
@@ -44,14 +39,10 @@ async def public_store_comparison(
detail="At least one product_id is required",
)
svc = PublicService(db)
return await svc.get_store_comparison(product_ids, category=category)
return await svc.get_store_comparison(product_ids)
@router.get("/inflation", response_model=PublicInflationResponse)
async def public_inflation(
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
period: str = Query("all-time", pattern=r"^(all-time|1y|6m|3m|1m)$"),
db: AsyncSession = Depends(get_db),
):
async def public_inflation(db: AsyncSession = Depends(get_db)):
svc = PublicService(db)
return await svc.get_inflation(category=category, period=period)
return await svc.get_inflation()
+23 -42
View File
@@ -1,6 +1,5 @@
"""Public service — unauthenticated price transparency endpoints."""
from datetime import date, timedelta
from uuid import UUID
from sqlalchemy import and_, func, select
@@ -14,7 +13,7 @@ class PublicService:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def get_trend(self, product_id: UUID, days: int = 90) -> dict:
async def get_trend(self, product_id: UUID) -> dict:
from cartsnitch_api.models import NormalizedProduct, PriceHistory
result = await self.db.execute(
@@ -24,13 +23,9 @@ class PublicService:
if not product:
raise LookupError("Product not found")
date_threshold = date.today() - timedelta(days=days)
prices_result = await self.db.execute(
select(PriceHistory)
.where(
PriceHistory.normalized_product_id == product_id,
PriceHistory.observed_date >= date_threshold,
)
.where(PriceHistory.normalized_product_id == product_id)
.options(selectinload(PriceHistory.store))
.order_by(PriceHistory.observed_date)
)
@@ -50,25 +45,20 @@ class PublicService:
],
}
async def get_store_comparison(
self, product_ids: list[UUID], category: str | None = None
) -> dict:
async def get_store_comparison(self, product_ids: list[UUID]) -> dict:
from cartsnitch_api.models import NormalizedProduct, PriceHistory
if not product_ids:
return {"products": []}
product_query = select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
if category:
product_query = product_query.where(NormalizedProduct.category == category)
prod_result = await self.db.execute(product_query)
# Fetch all products in one query
prod_result = await self.db.execute(
select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
)
products_by_id = {p.id: p for p in prod_result.scalars().all()}
if not products_by_id:
return {"products": []}
filtered_product_ids = list(products_by_id.keys())
subq = latest_price_per_store(filtered_product_ids)
# Latest prices for all requested products in one query
subq = latest_price_per_store(product_ids)
prices_result = await self.db.execute(
select(PriceHistory)
.join(
@@ -79,17 +69,18 @@ class PublicService:
PriceHistory.normalized_product_id == subq.c.normalized_product_id,
),
)
.where(PriceHistory.normalized_product_id.in_(filtered_product_ids))
.where(PriceHistory.normalized_product_id.in_(product_ids))
.options(selectinload(PriceHistory.store))
)
all_prices = prices_result.scalars().all()
# Group by product
prices_by_product: dict[UUID, list] = {}
for ph in all_prices:
prices_by_product.setdefault(ph.normalized_product_id, []).append(ph)
products = []
for pid in filtered_product_ids:
for pid in product_ids:
product = products_by_id.get(pid)
if not product:
continue
@@ -111,29 +102,19 @@ class PublicService:
return {"products": products}
async def get_inflation(self, category: str | None = None, period: str = "all-time") -> dict:
async def get_inflation(self) -> dict:
"""Aggregate price change stats. Compares average prices across periods."""
from cartsnitch_api.models import NormalizedProduct, PriceHistory
date_threshold = None
if period != "all-time":
days_map = {"1y": 365, "6m": 180, "3m": 90, "1m": 30}
days = days_map.get(period, 365)
date_threshold = date.today() - timedelta(days=days)
query = select(
NormalizedProduct.category,
func.avg(PriceHistory.regular_price),
).join(NormalizedProduct)
if category:
query = query.where(NormalizedProduct.category == category)
if date_threshold:
query = query.where(PriceHistory.observed_date >= date_threshold)
query = query.group_by(NormalizedProduct.category)
result = await self.db.execute(query)
# Get average prices grouped by category for recent vs older data
result = await self.db.execute(
select(
NormalizedProduct.category,
func.avg(PriceHistory.regular_price),
)
.join(NormalizedProduct)
.group_by(NormalizedProduct.category)
)
categories = {}
for row in result.all():
cat, avg_price = row
@@ -141,7 +122,7 @@ class PublicService:
categories[cat] = float(avg_price) if avg_price else 0.0
return {
"period": period,
"period": "all-time",
"cartsnitch_index": sum(categories.values()) / max(len(categories), 1),
"cpi_baseline": 100.0,
"categories": categories,
-94
View File
@@ -71,97 +71,3 @@ async def test_public_inflation(client, public_data):
data = resp.json()
assert "categories" in data
assert "cartsnitch_index" in data
@pytest.mark.asyncio
async def test_trend_invalid_uuid(client):
resp = await client.get("/public/trends/not-a-uuid")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_zero(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=0")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_negative(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=-1")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_over_max(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=999")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_trend_days_valid(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/trends/{pid}?days=30")
assert resp.status_code == 200
assert "product_name" in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_empty_list(client):
resp = await client.get("/public/store-comparison")
assert resp.status_code == 400
assert "detail" in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_category_xss(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(
f"/public/store-comparison?product_ids={pid}&category=<script>alert(1)</script>"
)
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_store_comparison_category_sql_injection(client, public_data):
pid = str(public_data["product"].id)
resp = await client.get(f"/public/store-comparison?product_ids={pid}&category='; DROP TABLE--")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_inflation_invalid_period(client, public_data):
resp = await client.get("/public/inflation?period=10years")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@pytest.mark.asyncio
async def test_inflation_valid_periods(client, public_data):
for period in ["all-time", "1y", "6m", "3m", "1m"]:
resp = await client.get(f"/public/inflation?period={period}")
assert resp.status_code == 200, f"period={period} failed"
@pytest.mark.asyncio
async def test_inflation_category_too_long(client, public_data):
long_category = "x" * 200
resp = await client.get(f"/public/inflation?category={long_category}")
assert resp.status_code == 422
assert "detail" in resp.json()
assert "stack" not in resp.json()
@@ -3,6 +3,7 @@
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import ProductCategory, SizeUnit
@@ -26,7 +27,9 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
upc_variants: Mapped[list[str] | None] = mapped_column(
JSON().with_variant(JSONB(), "postgresql"), default=list
)
# Relationships
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
+6
View File
@@ -9,6 +9,12 @@ server {
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
gzip_min_length 256;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
# Health endpoint for K8s probes
location /health {
access_log off;
+3 -3
View File
@@ -9805,9 +9805,9 @@
}
},
"node_modules/vite": {
"version": "6.4.1",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
"version": "6.4.2",
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
"devOptional": true,
"license": "MIT",
"dependencies": {
@@ -5,12 +5,14 @@ Matches products across retailers by:
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
"""
import json
import re
from dataclasses import dataclass
from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct
from sqlalchemy import select
from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session
@@ -96,17 +98,24 @@ def jaccard_similarity(a: str, b: str) -> float:
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
"""Find a normalized product by exact UPC match.
Loads products with upc_variants and checks membership in Python
for cross-database compatibility (works on both PostgreSQL and SQLite).
Uses PostgreSQL JSONB containment (@>) for production efficiency.
Falls back to LIKE on SQLite for test compatibility.
"""
# TODO: Use PostgreSQL JSON containment query (@>) for production.
# Current approach loads all products into memory — acceptable for tests
# and small datasets, but will not scale.
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
products = session.execute(stmt).scalars().all()
for product in products:
if product.upc_variants and upc in product.upc_variants:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
dialect_name = session.bind.dialect.name if session.bind else "default"
if dialect_name == "postgresql":
stmt = select(NormalizedProduct).where(
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
func.cast(json.dumps([upc]), JSONB)
)
)
else:
stmt = select(NormalizedProduct).where(
NormalizedProduct.upc_variants.is_not(None),
cast(NormalizedProduct.upc_variants, String).contains(upc),
)
product = session.execute(stmt).scalars().first()
if product:
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
return None