forked from cartsnitch/cartsnitch
Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1aff898545 | |||
| 24f0dd0e67 | |||
| da96ec7dc4 | |||
| 37798251be |
@@ -0,0 +1,38 @@
|
||||
"""Add GIN index on upc_variants and alter column to JSONB.
|
||||
|
||||
Revision ID: 009_add_gin_index_upc_variants
|
||||
Revises: 008_create_domain_tables
|
||||
Create Date: 2026-04-14
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision = "009_add_gin_index_upc_variants"
|
||||
down_revision = "008_create_domain_tables"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column(
|
||||
"normalized_products",
|
||||
"upc_variants",
|
||||
type_=sa.dialects.postgresql.JSONB(),
|
||||
postgresql_using="upc_variants::jsonb",
|
||||
)
|
||||
op.create_index(
|
||||
"ix_normalized_products_upc_variants_gin",
|
||||
"normalized_products",
|
||||
["upc_variants"],
|
||||
postgresql_using="gin",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
|
||||
op.alter_column(
|
||||
"normalized_products",
|
||||
"upc_variants",
|
||||
type_=sa.JSON(),
|
||||
)
|
||||
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
|
||||
CORSMiddleware,
|
||||
allow_origins=settings.cors_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
|
||||
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
|
||||
)
|
||||
|
||||
@@ -18,14 +18,10 @@ router = APIRouter(prefix="/public", tags=["public"])
|
||||
|
||||
|
||||
@router.get("/trends/{product_id}", response_model=PublicTrendResponse)
|
||||
async def public_price_trend(
|
||||
product_id: UUID,
|
||||
days: int = Query(90, ge=1, le=365),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
async def public_price_trend(product_id: UUID, db: AsyncSession = Depends(get_db)):
|
||||
svc = PublicService(db)
|
||||
try:
|
||||
return await svc.get_trend(product_id, days=days)
|
||||
return await svc.get_trend(product_id)
|
||||
except LookupError:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND, detail="Product not found"
|
||||
@@ -35,7 +31,6 @@ async def public_price_trend(
|
||||
@router.get("/store-comparison", response_model=PublicStoreComparisonResponse)
|
||||
async def public_store_comparison(
|
||||
product_ids: Annotated[list[UUID], Query(max_length=20)],
|
||||
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
if not product_ids:
|
||||
@@ -44,14 +39,10 @@ async def public_store_comparison(
|
||||
detail="At least one product_id is required",
|
||||
)
|
||||
svc = PublicService(db)
|
||||
return await svc.get_store_comparison(product_ids, category=category)
|
||||
return await svc.get_store_comparison(product_ids)
|
||||
|
||||
|
||||
@router.get("/inflation", response_model=PublicInflationResponse)
|
||||
async def public_inflation(
|
||||
category: str | None = Query(None, max_length=100, pattern=r"^[a-zA-Z0-9 _-]+$"),
|
||||
period: str = Query("all-time", pattern=r"^(all-time|1y|6m|3m|1m)$"),
|
||||
db: AsyncSession = Depends(get_db),
|
||||
):
|
||||
async def public_inflation(db: AsyncSession = Depends(get_db)):
|
||||
svc = PublicService(db)
|
||||
return await svc.get_inflation(category=category, period=period)
|
||||
return await svc.get_inflation()
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
"""Public service — unauthenticated price transparency endpoints."""
|
||||
|
||||
from datetime import date, timedelta
|
||||
from uuid import UUID
|
||||
|
||||
from sqlalchemy import and_, func, select
|
||||
@@ -14,7 +13,7 @@ class PublicService:
|
||||
def __init__(self, db: AsyncSession) -> None:
|
||||
self.db = db
|
||||
|
||||
async def get_trend(self, product_id: UUID, days: int = 90) -> dict:
|
||||
async def get_trend(self, product_id: UUID) -> dict:
|
||||
from cartsnitch_api.models import NormalizedProduct, PriceHistory
|
||||
|
||||
result = await self.db.execute(
|
||||
@@ -24,13 +23,9 @@ class PublicService:
|
||||
if not product:
|
||||
raise LookupError("Product not found")
|
||||
|
||||
date_threshold = date.today() - timedelta(days=days)
|
||||
prices_result = await self.db.execute(
|
||||
select(PriceHistory)
|
||||
.where(
|
||||
PriceHistory.normalized_product_id == product_id,
|
||||
PriceHistory.observed_date >= date_threshold,
|
||||
)
|
||||
.where(PriceHistory.normalized_product_id == product_id)
|
||||
.options(selectinload(PriceHistory.store))
|
||||
.order_by(PriceHistory.observed_date)
|
||||
)
|
||||
@@ -50,25 +45,20 @@ class PublicService:
|
||||
],
|
||||
}
|
||||
|
||||
async def get_store_comparison(
|
||||
self, product_ids: list[UUID], category: str | None = None
|
||||
) -> dict:
|
||||
async def get_store_comparison(self, product_ids: list[UUID]) -> dict:
|
||||
from cartsnitch_api.models import NormalizedProduct, PriceHistory
|
||||
|
||||
if not product_ids:
|
||||
return {"products": []}
|
||||
|
||||
product_query = select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
|
||||
if category:
|
||||
product_query = product_query.where(NormalizedProduct.category == category)
|
||||
prod_result = await self.db.execute(product_query)
|
||||
# Fetch all products in one query
|
||||
prod_result = await self.db.execute(
|
||||
select(NormalizedProduct).where(NormalizedProduct.id.in_(product_ids))
|
||||
)
|
||||
products_by_id = {p.id: p for p in prod_result.scalars().all()}
|
||||
|
||||
if not products_by_id:
|
||||
return {"products": []}
|
||||
|
||||
filtered_product_ids = list(products_by_id.keys())
|
||||
subq = latest_price_per_store(filtered_product_ids)
|
||||
# Latest prices for all requested products in one query
|
||||
subq = latest_price_per_store(product_ids)
|
||||
prices_result = await self.db.execute(
|
||||
select(PriceHistory)
|
||||
.join(
|
||||
@@ -79,17 +69,18 @@ class PublicService:
|
||||
PriceHistory.normalized_product_id == subq.c.normalized_product_id,
|
||||
),
|
||||
)
|
||||
.where(PriceHistory.normalized_product_id.in_(filtered_product_ids))
|
||||
.where(PriceHistory.normalized_product_id.in_(product_ids))
|
||||
.options(selectinload(PriceHistory.store))
|
||||
)
|
||||
all_prices = prices_result.scalars().all()
|
||||
|
||||
# Group by product
|
||||
prices_by_product: dict[UUID, list] = {}
|
||||
for ph in all_prices:
|
||||
prices_by_product.setdefault(ph.normalized_product_id, []).append(ph)
|
||||
|
||||
products = []
|
||||
for pid in filtered_product_ids:
|
||||
for pid in product_ids:
|
||||
product = products_by_id.get(pid)
|
||||
if not product:
|
||||
continue
|
||||
@@ -111,29 +102,19 @@ class PublicService:
|
||||
|
||||
return {"products": products}
|
||||
|
||||
async def get_inflation(self, category: str | None = None, period: str = "all-time") -> dict:
|
||||
async def get_inflation(self) -> dict:
|
||||
"""Aggregate price change stats. Compares average prices across periods."""
|
||||
from cartsnitch_api.models import NormalizedProduct, PriceHistory
|
||||
|
||||
date_threshold = None
|
||||
if period != "all-time":
|
||||
days_map = {"1y": 365, "6m": 180, "3m": 90, "1m": 30}
|
||||
days = days_map.get(period, 365)
|
||||
date_threshold = date.today() - timedelta(days=days)
|
||||
|
||||
query = select(
|
||||
NormalizedProduct.category,
|
||||
func.avg(PriceHistory.regular_price),
|
||||
).join(NormalizedProduct)
|
||||
|
||||
if category:
|
||||
query = query.where(NormalizedProduct.category == category)
|
||||
if date_threshold:
|
||||
query = query.where(PriceHistory.observed_date >= date_threshold)
|
||||
|
||||
query = query.group_by(NormalizedProduct.category)
|
||||
|
||||
result = await self.db.execute(query)
|
||||
# Get average prices grouped by category for recent vs older data
|
||||
result = await self.db.execute(
|
||||
select(
|
||||
NormalizedProduct.category,
|
||||
func.avg(PriceHistory.regular_price),
|
||||
)
|
||||
.join(NormalizedProduct)
|
||||
.group_by(NormalizedProduct.category)
|
||||
)
|
||||
categories = {}
|
||||
for row in result.all():
|
||||
cat, avg_price = row
|
||||
@@ -141,7 +122,7 @@ class PublicService:
|
||||
categories[cat] = float(avg_price) if avg_price else 0.0
|
||||
|
||||
return {
|
||||
"period": period,
|
||||
"period": "all-time",
|
||||
"cartsnitch_index": sum(categories.values()) / max(len(categories), 1),
|
||||
"cpi_baseline": 100.0,
|
||||
"categories": categories,
|
||||
|
||||
@@ -71,97 +71,3 @@ async def test_public_inflation(client, public_data):
|
||||
data = resp.json()
|
||||
assert "categories" in data
|
||||
assert "cartsnitch_index" in data
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trend_invalid_uuid(client):
|
||||
resp = await client.get("/public/trends/not-a-uuid")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trend_days_zero(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(f"/public/trends/{pid}?days=0")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trend_days_negative(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(f"/public/trends/{pid}?days=-1")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trend_days_over_max(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(f"/public/trends/{pid}?days=999")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_trend_days_valid(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(f"/public/trends/{pid}?days=30")
|
||||
assert resp.status_code == 200
|
||||
assert "product_name" in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_comparison_empty_list(client):
|
||||
resp = await client.get("/public/store-comparison")
|
||||
assert resp.status_code == 400
|
||||
assert "detail" in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_comparison_category_xss(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(
|
||||
f"/public/store-comparison?product_ids={pid}&category=<script>alert(1)</script>"
|
||||
)
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_store_comparison_category_sql_injection(client, public_data):
|
||||
pid = str(public_data["product"].id)
|
||||
resp = await client.get(f"/public/store-comparison?product_ids={pid}&category='; DROP TABLE--")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inflation_invalid_period(client, public_data):
|
||||
resp = await client.get("/public/inflation?period=10years")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inflation_valid_periods(client, public_data):
|
||||
for period in ["all-time", "1y", "6m", "3m", "1m"]:
|
||||
resp = await client.get(f"/public/inflation?period={period}")
|
||||
assert resp.status_code == 200, f"period={period} failed"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_inflation_category_too_long(client, public_data):
|
||||
long_category = "x" * 200
|
||||
resp = await client.get(f"/public/inflation?category={long_category}")
|
||||
assert resp.status_code == 422
|
||||
assert "detail" in resp.json()
|
||||
assert "stack" not in resp.json()
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import JSON, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from cartsnitch_common.constants import ProductCategory, SizeUnit
|
||||
@@ -26,7 +27,9 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
|
||||
brand: Mapped[str | None] = mapped_column(String(200))
|
||||
size: Mapped[str | None] = mapped_column(String(50))
|
||||
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
|
||||
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
|
||||
upc_variants: Mapped[list[str] | None] = mapped_column(
|
||||
JSON().with_variant(JSONB(), "postgresql"), default=list
|
||||
)
|
||||
|
||||
# Relationships
|
||||
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
|
||||
|
||||
@@ -9,6 +9,12 @@ server {
|
||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
|
||||
gzip_min_length 256;
|
||||
|
||||
# Security headers
|
||||
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||
add_header X-Content-Type-Options "nosniff" always;
|
||||
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
|
||||
|
||||
# Health endpoint for K8s probes
|
||||
location /health {
|
||||
access_log off;
|
||||
|
||||
Generated
+3
-3
@@ -9805,9 +9805,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.4.1",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
|
||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
||||
"version": "6.4.2",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
|
||||
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
||||
@@ -5,12 +5,14 @@ Matches products across retailers by:
|
||||
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import StrEnum
|
||||
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy import cast, func, select, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@@ -96,17 +98,24 @@ def jaccard_similarity(a: str, b: str) -> float:
|
||||
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
|
||||
"""Find a normalized product by exact UPC match.
|
||||
|
||||
Loads products with upc_variants and checks membership in Python
|
||||
for cross-database compatibility (works on both PostgreSQL and SQLite).
|
||||
Uses PostgreSQL JSONB containment (@>) for production efficiency.
|
||||
Falls back to LIKE on SQLite for test compatibility.
|
||||
"""
|
||||
# TODO: Use PostgreSQL JSON containment query (@>) for production.
|
||||
# Current approach loads all products into memory — acceptable for tests
|
||||
# and small datasets, but will not scale.
|
||||
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
|
||||
products = session.execute(stmt).scalars().all()
|
||||
for product in products:
|
||||
if product.upc_variants and upc in product.upc_variants:
|
||||
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
||||
dialect_name = session.bind.dialect.name if session.bind else "default"
|
||||
if dialect_name == "postgresql":
|
||||
stmt = select(NormalizedProduct).where(
|
||||
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
|
||||
func.cast(json.dumps([upc]), JSONB)
|
||||
)
|
||||
)
|
||||
else:
|
||||
stmt = select(NormalizedProduct).where(
|
||||
NormalizedProduct.upc_variants.is_not(None),
|
||||
cast(NormalizedProduct.upc_variants, String).contains(upc),
|
||||
)
|
||||
product = session.execute(stmt).scalars().first()
|
||||
if product:
|
||||
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user