forked from cartsnitch/cartsnitch
Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 1aff898545 | |||
| 24f0dd0e67 | |||
| da96ec7dc4 | |||
| 37798251be | |||
| ee97f64db6 | |||
| 538a5f4f4d |
@@ -0,0 +1,38 @@
|
|||||||
|
"""Add GIN index on upc_variants and alter column to JSONB.
|
||||||
|
|
||||||
|
Revision ID: 009_add_gin_index_upc_variants
|
||||||
|
Revises: 008_create_domain_tables
|
||||||
|
Create Date: 2026-04-14
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
revision = "009_add_gin_index_upc_variants"
|
||||||
|
down_revision = "008_create_domain_tables"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
op.alter_column(
|
||||||
|
"normalized_products",
|
||||||
|
"upc_variants",
|
||||||
|
type_=sa.dialects.postgresql.JSONB(),
|
||||||
|
postgresql_using="upc_variants::jsonb",
|
||||||
|
)
|
||||||
|
op.create_index(
|
||||||
|
"ix_normalized_products_upc_variants_gin",
|
||||||
|
"normalized_products",
|
||||||
|
["upc_variants"],
|
||||||
|
postgresql_using="gin",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
op.drop_index("ix_normalized_products_upc_variants_gin", table_name="normalized_products")
|
||||||
|
op.alter_column(
|
||||||
|
"normalized_products",
|
||||||
|
"upc_variants",
|
||||||
|
type_=sa.JSON(),
|
||||||
|
)
|
||||||
@@ -13,14 +13,13 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
redis_url: str = "redis://localhost:6379/0"
|
redis_url: str = "redis://localhost:6379/0"
|
||||||
|
|
||||||
jwt_secret_key: str = "change-me-in-production"
|
jwt_secret_key: str
|
||||||
jwt_algorithm: str = "HS256"
|
jwt_algorithm: str = "HS256"
|
||||||
jwt_access_token_expire_minutes: int = 15
|
jwt_access_token_expire_minutes: int = 15
|
||||||
jwt_refresh_token_expire_days: int = 7
|
jwt_refresh_token_expire_days: int = 7
|
||||||
|
|
||||||
service_key: str = "change-me-in-production"
|
service_key: str
|
||||||
# Valid Fernet key for local dev — MUST be overridden in production
|
fernet_key: str
|
||||||
fernet_key: str = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
|
|
||||||
|
|
||||||
auth_service_url: str = "http://auth:3001"
|
auth_service_url: str = "http://auth:3001"
|
||||||
|
|
||||||
@@ -35,9 +34,26 @@ class Settings(BaseSettings):
|
|||||||
rate_limit_window_seconds: int = 60
|
rate_limit_window_seconds: int = 60
|
||||||
rate_limit_enabled: bool = True
|
rate_limit_enabled: bool = True
|
||||||
|
|
||||||
|
_PLACEHOLDER_VALUES = {"change-me-in-production"}
|
||||||
|
|
||||||
@model_validator(mode="after")
|
@model_validator(mode="after")
|
||||||
def validate_fernet_key(self):
|
def validate_secrets(self):
|
||||||
"""Validate fernet_key is a valid 32-byte url-safe base64 key at startup."""
|
if not self.jwt_secret_key or self.jwt_secret_key in self._PLACEHOLDER_VALUES:
|
||||||
|
raise ValueError(
|
||||||
|
"CARTSNITCH_JWT_SECRET_KEY must be set to a secure value. "
|
||||||
|
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
|
||||||
|
)
|
||||||
|
if not self.service_key or self.service_key in self._PLACEHOLDER_VALUES:
|
||||||
|
raise ValueError(
|
||||||
|
"CARTSNITCH_SERVICE_KEY must be set to a secure value. "
|
||||||
|
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
|
||||||
|
)
|
||||||
|
if not self.fernet_key or self.fernet_key in self._PLACEHOLDER_VALUES:
|
||||||
|
raise ValueError(
|
||||||
|
"CARTSNITCH_FERNET_KEY must be set to a valid Fernet key. "
|
||||||
|
"Generate one with: python -c "
|
||||||
|
"'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'"
|
||||||
|
)
|
||||||
try:
|
try:
|
||||||
decoded = base64.urlsafe_b64decode(self.fernet_key.encode())
|
decoded = base64.urlsafe_b64decode(self.fernet_key.encode())
|
||||||
if len(decoded) != 32:
|
if len(decoded) != 32:
|
||||||
|
|||||||
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
|
|||||||
CORSMiddleware,
|
CORSMiddleware,
|
||||||
allow_origins=settings.cors_origins,
|
allow_origins=settings.cors_origins,
|
||||||
allow_credentials=True,
|
allow_credentials=True,
|
||||||
allow_methods=["*"],
|
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
|
||||||
allow_headers=["*"],
|
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
|
||||||
)
|
)
|
||||||
|
|||||||
+34
-7
@@ -19,6 +19,25 @@ from cartsnitch_api.database import get_db
|
|||||||
from cartsnitch_api.main import create_app
|
from cartsnitch_api.main import create_app
|
||||||
from cartsnitch_api.models import Base
|
from cartsnitch_api.models import Base
|
||||||
|
|
||||||
|
TEST_JWT_SECRET = secrets.token_urlsafe(32)
|
||||||
|
TEST_SERVICE_KEY = secrets.token_urlsafe(32)
|
||||||
|
TEST_FERNET_KEY = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def setup_test_settings():
|
||||||
|
original_jwt = cartsnitch_settings.jwt_secret_key
|
||||||
|
original_service = cartsnitch_settings.service_key
|
||||||
|
original_fernet = cartsnitch_settings.fernet_key
|
||||||
|
cartsnitch_settings.jwt_secret_key = TEST_JWT_SECRET
|
||||||
|
cartsnitch_settings.service_key = TEST_SERVICE_KEY
|
||||||
|
cartsnitch_settings.fernet_key = TEST_FERNET_KEY
|
||||||
|
yield
|
||||||
|
cartsnitch_settings.jwt_secret_key = original_jwt
|
||||||
|
cartsnitch_settings.service_key = original_service
|
||||||
|
cartsnitch_settings.fernet_key = original_fernet
|
||||||
|
|
||||||
|
|
||||||
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
|
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
|
||||||
|
|
||||||
|
|
||||||
@@ -60,7 +79,8 @@ async def db_engine():
|
|||||||
async with engine.begin() as conn:
|
async with engine.begin() as conn:
|
||||||
await conn.run_sync(Base.metadata.create_all)
|
await conn.run_sync(Base.metadata.create_all)
|
||||||
# Create Better-Auth tables (not managed by SQLAlchemy models)
|
# Create Better-Auth tables (not managed by SQLAlchemy models)
|
||||||
await conn.execute(text("""
|
await conn.execute(
|
||||||
|
text("""
|
||||||
CREATE TABLE IF NOT EXISTS sessions (
|
CREATE TABLE IF NOT EXISTS sessions (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
token TEXT NOT NULL UNIQUE,
|
token TEXT NOT NULL UNIQUE,
|
||||||
@@ -71,8 +91,10 @@ async def db_engine():
|
|||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
)
|
)
|
||||||
"""))
|
""")
|
||||||
await conn.execute(text("""
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text("""
|
||||||
CREATE TABLE IF NOT EXISTS accounts (
|
CREATE TABLE IF NOT EXISTS accounts (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
user_id TEXT NOT NULL,
|
user_id TEXT NOT NULL,
|
||||||
@@ -88,8 +110,10 @@ async def db_engine():
|
|||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
)
|
)
|
||||||
"""))
|
""")
|
||||||
await conn.execute(text("""
|
)
|
||||||
|
await conn.execute(
|
||||||
|
text("""
|
||||||
CREATE TABLE IF NOT EXISTS verifications (
|
CREATE TABLE IF NOT EXISTS verifications (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
identifier TEXT NOT NULL,
|
identifier TEXT NOT NULL,
|
||||||
@@ -98,7 +122,8 @@ async def db_engine():
|
|||||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
|
||||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
|
||||||
)
|
)
|
||||||
"""))
|
""")
|
||||||
|
)
|
||||||
|
|
||||||
yield engine
|
yield engine
|
||||||
|
|
||||||
@@ -133,7 +158,9 @@ async def client(db_engine):
|
|||||||
app.dependency_overrides.clear()
|
app.dependency_overrides.clear()
|
||||||
|
|
||||||
|
|
||||||
async def _create_test_user_and_session(client: AsyncClient, db_engine, **user_overrides) -> tuple[dict, str]:
|
async def _create_test_user_and_session(
|
||||||
|
client: AsyncClient, db_engine, **user_overrides
|
||||||
|
) -> tuple[dict, str]:
|
||||||
"""Create a test user and a valid session directly in the DB.
|
"""Create a test user and a valid session directly in the DB.
|
||||||
|
|
||||||
Returns (user_dict, session_token). Better-Auth stores the raw token
|
Returns (user_dict, session_token). Better-Auth stores the raw token
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
from typing import TYPE_CHECKING
|
from typing import TYPE_CHECKING
|
||||||
|
|
||||||
from sqlalchemy import JSON, String
|
from sqlalchemy import JSON, String
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
||||||
|
|
||||||
from cartsnitch_common.constants import ProductCategory, SizeUnit
|
from cartsnitch_common.constants import ProductCategory, SizeUnit
|
||||||
@@ -26,7 +27,9 @@ class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
|
|||||||
brand: Mapped[str | None] = mapped_column(String(200))
|
brand: Mapped[str | None] = mapped_column(String(200))
|
||||||
size: Mapped[str | None] = mapped_column(String(50))
|
size: Mapped[str | None] = mapped_column(String(50))
|
||||||
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
|
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
|
||||||
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
|
upc_variants: Mapped[list[str] | None] = mapped_column(
|
||||||
|
JSON().with_variant(JSONB(), "postgresql"), default=list
|
||||||
|
)
|
||||||
|
|
||||||
# Relationships
|
# Relationships
|
||||||
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
|
purchase_items: Mapped[list["PurchaseItem"]] = relationship(back_populates="normalized_product")
|
||||||
|
|||||||
@@ -9,6 +9,12 @@ server {
|
|||||||
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
|
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
|
||||||
gzip_min_length 256;
|
gzip_min_length 256;
|
||||||
|
|
||||||
|
# Security headers
|
||||||
|
add_header X-Frame-Options "SAMEORIGIN" always;
|
||||||
|
add_header X-Content-Type-Options "nosniff" always;
|
||||||
|
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
|
||||||
|
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
|
||||||
|
|
||||||
# Health endpoint for K8s probes
|
# Health endpoint for K8s probes
|
||||||
location /health {
|
location /health {
|
||||||
access_log off;
|
access_log off;
|
||||||
|
|||||||
Generated
+3
-3
@@ -9805,9 +9805,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/vite": {
|
"node_modules/vite": {
|
||||||
"version": "6.4.1",
|
"version": "6.4.2",
|
||||||
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.1.tgz",
|
"resolved": "https://registry.npmjs.org/vite/-/vite-6.4.2.tgz",
|
||||||
"integrity": "sha512-+Oxm7q9hDoLMyJOYfUYBuHQo+dkAloi33apOPP56pzj+vsdJDzr+j1NISE5pyaAuKL4A3UD34qd0lx5+kfKp2g==",
|
"integrity": "sha512-2N/55r4JDJ4gdrCvGgINMy+HH3iRpNIz8K6SFwVsA+JbQScLiC+clmAxBgwiSPgcG9U15QmvqCGWzMbqda5zGQ==",
|
||||||
"devOptional": true,
|
"devOptional": true,
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
|
|||||||
@@ -5,12 +5,14 @@ Matches products across retailers by:
|
|||||||
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
|
2. Fuzzy name matching via token-based Jaccard similarity (lower confidence)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
import re
|
import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
|
||||||
from cartsnitch_common.models.product import NormalizedProduct
|
from cartsnitch_common.models.product import NormalizedProduct
|
||||||
from sqlalchemy import select
|
from sqlalchemy import cast, func, select, String
|
||||||
|
from sqlalchemy.dialects.postgresql import JSONB
|
||||||
from sqlalchemy.orm import Session
|
from sqlalchemy.orm import Session
|
||||||
|
|
||||||
|
|
||||||
@@ -96,17 +98,24 @@ def jaccard_similarity(a: str, b: str) -> float:
|
|||||||
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
|
def match_by_upc(session: Session, upc: str) -> MatchResult | None:
|
||||||
"""Find a normalized product by exact UPC match.
|
"""Find a normalized product by exact UPC match.
|
||||||
|
|
||||||
Loads products with upc_variants and checks membership in Python
|
Uses PostgreSQL JSONB containment (@>) for production efficiency.
|
||||||
for cross-database compatibility (works on both PostgreSQL and SQLite).
|
Falls back to LIKE on SQLite for test compatibility.
|
||||||
"""
|
"""
|
||||||
# TODO: Use PostgreSQL JSON containment query (@>) for production.
|
dialect_name = session.bind.dialect.name if session.bind else "default"
|
||||||
# Current approach loads all products into memory — acceptable for tests
|
if dialect_name == "postgresql":
|
||||||
# and small datasets, but will not scale.
|
stmt = select(NormalizedProduct).where(
|
||||||
stmt = select(NormalizedProduct).where(NormalizedProduct.upc_variants.is_not(None))
|
cast(NormalizedProduct.upc_variants, JSONB).op("@>")(
|
||||||
products = session.execute(stmt).scalars().all()
|
func.cast(json.dumps([upc]), JSONB)
|
||||||
for product in products:
|
)
|
||||||
if product.upc_variants and upc in product.upc_variants:
|
)
|
||||||
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
else:
|
||||||
|
stmt = select(NormalizedProduct).where(
|
||||||
|
NormalizedProduct.upc_variants.is_not(None),
|
||||||
|
cast(NormalizedProduct.upc_variants, String).contains(upc),
|
||||||
|
)
|
||||||
|
product = session.execute(stmt).scalars().first()
|
||||||
|
if product:
|
||||||
|
return MatchResult(product=product, confidence=1.0, method=MatchMethod.UPC)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user