Compare commits
30 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 37c75c3887 | |||
| 8a0b2c03a1 | |||
| 5825174f0d | |||
| aa893d9cc1 | |||
| 91c062130c | |||
| 68e6be1985 | |||
| 0aef2455fd | |||
| c2a0263ddd | |||
| 6602b8c105 | |||
| bc5e03e7a0 | |||
| dbbc8d2e7b | |||
| 1267caf43c | |||
| 015401861a | |||
| 9891e1aefb | |||
| 69ad161e36 | |||
| 485f890df3 | |||
| bf3ed0ede3 | |||
| 3f41eb7346 | |||
| 6cbd1ef298 | |||
| 94214f762e | |||
| 562c6ef6f6 | |||
| ccc8189d88 | |||
| 86594e4a8e | |||
| c2f1a83c1d | |||
| 6f8e5a9577 | |||
| bbfa816e57 | |||
| 5904eb03a2 | |||
| 87b6433ff7 | |||
| d7c9938f7e | |||
| 02434060ee |
@@ -1,38 +0,0 @@
|
||||
"""Add GIN index on upc_variants and alter column to JSONB.
|
||||
|
||||
Revision ID: 009_add_gin_index_upc_variants
|
||||
Revises: 008_create_domain_tables
|
||||
Create Date: 2026-04-14
|
||||
"""
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
revision = "009_add_gin_index_upc_variants"
|
||||
down_revision = "008_create_domain_tables"
|
||||
branch_labels = None
|
||||
depends_on = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
op.alter_column(
|
||||
"normalized_products",
|
||||
"upc_variants",
|
||||
type_=sa.dialects.postgresql.JSONB(),
|
||||
postgresql_using="upc_variants::jsonb",
|
||||
)
|
||||
op.create_index(
|
||||
"ix_normalized_products_upc_variants_gin",
|
||||
"normalized_products",
|
||||
["upc_variants"],
|
||||
postgresql_using="gin",
|
||||
)
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
|
||||
op.alter_column(
|
||||
"normalized_products",
|
||||
"upc_variants",
|
||||
type_=sa.JSON(),
|
||||
)
|
||||
@@ -1,26 +1,51 @@
|
||||
"""Redis/DragonflyDB caching helpers."""
|
||||
|
||||
import redis.asyncio as redis
|
||||
|
||||
from cartsnitch_api.config import settings
|
||||
|
||||
|
||||
class CacheClient:
|
||||
"""Stub for Redis/DragonflyDB caching.
|
||||
"""Redis/DragonflyDB caching with connection pooling.
|
||||
|
||||
Will be used for expensive queries: price trends, product comparisons.
|
||||
Cache invalidation via Redis pub/sub events from other services.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.url = settings.redis_url
|
||||
self._pool: redis.ConnectionPool | None = None
|
||||
self._client: redis.Redis | None = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
"""Initialize the Redis connection pool."""
|
||||
self._pool = redis.ConnectionPool.from_url(
|
||||
settings.redis_url,
|
||||
max_connections=20,
|
||||
decode_responses=True,
|
||||
)
|
||||
self._client = redis.Redis(connection_pool=self._pool)
|
||||
|
||||
async def close(self) -> None:
|
||||
"""Close the Redis connection pool."""
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
if self._pool:
|
||||
await self._pool.aclose()
|
||||
|
||||
async def get(self, key: str) -> str | None:
|
||||
# TODO: implement with redis-py async
|
||||
return None
|
||||
if not self._client:
|
||||
return None
|
||||
return await self._client.get(key)
|
||||
|
||||
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
|
||||
# TODO: implement with redis-py async
|
||||
pass
|
||||
if not self._client:
|
||||
return
|
||||
await self._client.set(key, value, ex=ttl_seconds)
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
# TODO: implement with redis-py async
|
||||
pass
|
||||
if not self._client:
|
||||
return
|
||||
await self._client.delete(key)
|
||||
|
||||
|
||||
cache_client = CacheClient()
|
||||
|
||||
@@ -6,7 +6,14 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
|
||||
|
||||
from cartsnitch_api.config import settings
|
||||
|
||||
engine = create_async_engine(settings.database_url, echo=False)
|
||||
engine = create_async_engine(
|
||||
settings.database_url,
|
||||
echo=False,
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
pool_pre_ping=True,
|
||||
pool_recycle=3600,
|
||||
)
|
||||
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
|
||||
@@ -14,3 +21,8 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
|
||||
"""FastAPI dependency that yields an async DB session."""
|
||||
async with async_session_factory() as session:
|
||||
yield session
|
||||
|
||||
|
||||
async def dispose_engine() -> None:
|
||||
"""Dispose the database engine, closing all pooled connections."""
|
||||
await engine.dispose()
|
||||
|
||||
@@ -5,6 +5,8 @@ from contextlib import asynccontextmanager
|
||||
from fastapi import APIRouter, FastAPI
|
||||
|
||||
from cartsnitch_api.auth.routes import router as auth_router
|
||||
from cartsnitch_api.cache import cache_client
|
||||
from cartsnitch_api.database import dispose_engine
|
||||
from cartsnitch_api.middleware.cors import add_cors_middleware
|
||||
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
|
||||
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
|
||||
@@ -23,9 +25,10 @@ from cartsnitch_api.routes.user import router as user_router
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
# TODO: initialize DB session pool, Redis connection, service clients
|
||||
await cache_client.initialize()
|
||||
yield
|
||||
# TODO: cleanup connections
|
||||
await cache_client.close()
|
||||
await dispose_engine()
|
||||
|
||||
|
||||
def create_app() -> FastAPI:
|
||||
|
||||
@@ -4,6 +4,7 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
|
||||
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import time
|
||||
from collections import defaultdict
|
||||
from threading import Lock
|
||||
@@ -71,8 +72,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
|
||||
auth_header = request.headers.get("authorization", "")
|
||||
if auth_header.startswith("Bearer "):
|
||||
token = auth_header[7:]
|
||||
# Use last 16 chars of token as key to avoid storing full tokens
|
||||
return f"token:{token[-16:]}", _auth_limiter
|
||||
token_hash = hashlib.sha256(token.encode()).hexdigest()
|
||||
return f"token:{token_hash}", _auth_limiter
|
||||
|
||||
# Fallback to IP for unauthenticated non-public endpoints
|
||||
return f"ip:{_get_client_ip(request)}", _public_limiter
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
"""Tests for rate limiting middleware."""
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
|
||||
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
|
||||
|
||||
|
||||
class TestSlidingWindowCounter:
|
||||
@@ -53,3 +55,32 @@ async def test_health_skips_rate_limit(client):
|
||||
resp = await client.get("/health")
|
||||
assert resp.status_code == 200
|
||||
assert "x-ratelimit-limit" not in resp.headers
|
||||
|
||||
|
||||
class TestGetRateLimitKey:
|
||||
def _make_request(self, auth_header: str = "") -> MagicMock:
|
||||
req = MagicMock()
|
||||
req.url.path = "/purchases"
|
||||
req.headers = {"authorization": auth_header} if auth_header else {}
|
||||
return req
|
||||
|
||||
def test_distinct_tokens_produce_distinct_keys(self):
|
||||
req1 = self._make_request("Bearer token_alpha_12345")
|
||||
req2 = self._make_request("Bearer token_beta_67890")
|
||||
key1, _ = _get_rate_limit_key(req1)
|
||||
key2, _ = _get_rate_limit_key(req2)
|
||||
assert key1 != key2
|
||||
|
||||
def test_same_token_produces_same_key(self):
|
||||
req1 = self._make_request("Bearer same_token_value_abc")
|
||||
req2 = self._make_request("Bearer same_token_value_abc")
|
||||
key1, _ = _get_rate_limit_key(req1)
|
||||
key2, _ = _get_rate_limit_key(req2)
|
||||
assert key1 == key2
|
||||
|
||||
def test_key_does_not_contain_raw_token_suffix(self):
|
||||
raw_token = "my_secret_jwt_token_xyz"
|
||||
req = self._make_request(f"Bearer {raw_token}")
|
||||
key, _ = _get_rate_limit_key(req)
|
||||
assert raw_token[-16:] not in key
|
||||
assert raw_token not in key
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
from typing import TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import JSON, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||
|
||||
from cartsnitch_common.constants import ProductCategory, SizeUnit
|
||||
@@ -27,9 +26,7 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
|
||||
brand: Mapped[str | None] = mapped_column(String(200))
|
||||
size: Mapped[str | None] = mapped_column(String(50))
|
||||
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
|
||||
upc_variants: Mapped[list[str] | None] = mapped_column(
|
||||
JSON().with_variant(JSONB(), "postgresql"), default=list
|
||||
)
|
||||
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
|
||||
|
||||
# Relationships
|
||||
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
|
||||
|
||||
Generated
+3
-3
@@ -9805,9 +9805,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/vite": {
|
||||
"version": "6.4.2",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
|
||||
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
|
||||
"version": "6.4.1",
|
||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
|
||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
||||
"devOptional": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
|
||||
@@ -5,14 +5,12 @@ Matches products across retailers by:
|
||||
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from enum import StrEnum
|
||||
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from sqlalchemy import cast, func, select, String
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy import select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
|
||||
@@ -98,24 +96,17 @@ def jaccard_similarity(a: str, b: str) -> float:
|
||||
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
|
||||
"""Find a normalized product by exact UPC match.
|
||||
|
||||
Uses PostgreSQL JSONB containment (@>) for production efficiency.
|
||||
Falls back to LIKE on SQLite for test compatibility.
|
||||
Loads products with upc_variants and checks membership in Python
|
||||
for cross-database compatibility (works on both PostgreSQL and SQLite).
|
||||
"""
|
||||
dialect_name = session.bind.dialect.name if session.bind else "default"
|
||||
if dialect_name == "postgresql":
|
||||
stmt = select(NormalizedProduct).where(
|
||||
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
|
||||
func.cast(json.dumps([upc]), JSONB)
|
||||
)
|
||||
)
|
||||
else:
|
||||
stmt = select(NormalizedProduct).where(
|
||||
NormalizedProduct.upc_variants.is_not(None),
|
||||
cast(NormalizedProduct.upc_variants, String).contains(upc),
|
||||
)
|
||||
product = session.execute(stmt).scalars().first()
|
||||
if product:
|
||||
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
||||
# TODO: Use PostgreSQL JSON containment query (@>) for production.
|
||||
# Current approach loads all products into memory — acceptable for tests
|
||||
# and small datasets, but will not scale.
|
||||
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
|
||||
products = session.execute(stmt).scalars().all()
|
||||
for product in products:
|
||||
if product.upc_variants and upc in product.upc_variants:
|
||||
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user