From f47da487da7eb40a585da3862f46d6ef655106f1 Mon Sep 17 00:00:00 2001 From: Barcode Betty Date: Sun, 19 Apr 2026 12:18:11 +0000 Subject: [PATCH] feat: migrate receiptwitness to standalone repo with inlined common Extract receiptwitness/ from the monorepo into cartsnitch/receiptwitness. Inline the consumed modules from cartsnitch-common so there is no cross-repo dependency. - Add src/receiptwitness/shared/ with inlined models, schemas, constants, database - Update all imports from cartsnitch_common to receiptwitness.shared - Remove cartsnitch-common dependency from pyproject.toml - Copy and update Alembic config (alembic.ini, alembic/) - Update Dockerfile for standalone build context, add migration CMD - Add CI workflow with lint, test, build, grype scan, deploy-dev, deploy-uat - Add .grype.yaml Co-Authored-By: Paperclip --- .github/workflows/ci.yml | 212 ++++++++++++++++++ .grype.yaml | 4 + CLAUDE.md | 6 +- Dockerfile | 20 +- README.md | 29 +++ alembic.ini | 36 +++ alembic/env.py | 51 +++++ alembic/script.py.mako | 25 +++ .../versions/001_add_email_inbound_token.py | 37 +++ pyproject.toml | 7 +- src/receiptwitness/events.py | 4 +- src/receiptwitness/pipeline/matching.py | 6 +- src/receiptwitness/pipeline/normalization.py | 2 +- src/receiptwitness/pipeline/receipt.py | 2 +- src/receiptwitness/shared/__init__.py | 5 + src/receiptwitness/shared/config.py | 18 ++ src/receiptwitness/shared/constants.py | 85 +++++++ src/receiptwitness/shared/database.py | 45 ++++ src/receiptwitness/shared/models/__init__.py | 23 ++ src/receiptwitness/shared/models/base.py | 30 +++ src/receiptwitness/shared/models/product.py | 26 +++ .../shared/models/stub_purchase.py | 64 ++++++ .../shared/models/stub_store.py | 39 ++++ src/receiptwitness/shared/models/user.py | 63 ++++++ src/receiptwitness/shared/schemas/__init__.py | 5 + src/receiptwitness/shared/schemas/purchase.py | 73 ++++++ src/receiptwitness/worker/email_worker.py | 4 +- tests/test_pipeline/conftest.py | 2 +- tests/test_pipeline/test_matching.py | 6 +- tests/test_pipeline/test_normalization.py | 2 +- 30 files changed, 898 insertions(+), 33 deletions(-) create mode 100644 .github/workflows/ci.yml create mode 100644 .grype.yaml create mode 100644 README.md create mode 100644 alembic.ini create mode 100644 alembic/env.py create mode 100644 alembic/script.py.mako create mode 100644 alembic/versions/001_add_email_inbound_token.py create mode 100644 src/receiptwitness/shared/__init__.py create mode 100644 src/receiptwitness/shared/config.py create mode 100644 src/receiptwitness/shared/constants.py create mode 100644 src/receiptwitness/shared/database.py create mode 100644 src/receiptwitness/shared/models/__init__.py create mode 100644 src/receiptwitness/shared/models/base.py create mode 100644 src/receiptwitness/shared/models/product.py create mode 100644 src/receiptwitness/shared/models/stub_purchase.py create mode 100644 src/receiptwitness/shared/models/stub_store.py create mode 100644 src/receiptwitness/shared/models/user.py create mode 100644 src/receiptwitness/shared/schemas/__init__.py create mode 100644 src/receiptwitness/shared/schemas/purchase.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..ddc3c49 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,212 @@ +name: CI + +on: + push: + branches: [main, dev, uat] + pull_request: + branches: [main, dev, uat] + +concurrency: + group: ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: write + packages: write + +env: + REGISTRY: ghcr.io + IMAGE_NAME: cartsnitch/receiptwitness + +jobs: + lint: + runs-on: runners-cartsnitch + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: pip install ruff + - name: Lint + run: ruff check src/ tests/ + + test: + runs-on: runners-cartsnitch + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + - name: Install dependencies + run: pip install -e ".[dev]" + - name: Run tests + run: pytest tests/ -v + + build-and-push: + runs-on: runners-cartsnitch + if: github.event_name == 'push' + needs: [lint, test] + outputs: + calver_tag: ${{ steps.calver.outputs.version }} + sha_tag: sha-${{ github.sha }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Generate CalVer tag + id: calver + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: | + DATE_TAG=$(date -u +%Y.%m.%d) + EXISTING=$(git tag -l "v${DATE_TAG}*" | sort -V | tail -1) + if [ -z "$EXISTING" ]; then VERSION="$DATE_TAG" + elif [ "$EXISTING" = "v${DATE_TAG}" ]; then VERSION="${DATE_TAG}.2" + else BUILD_NUM=$(echo "$EXISTING" | sed "s/v${DATE_TAG}\.//"); VERSION="${DATE_TAG}.$((BUILD_NUM + 1))"; fi + echo "version=$VERSION" >> "$GITHUB_OUTPUT" + echo "CalVer tag: $VERSION" + + - name: Log in to GHCR + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=sha,prefix=sha-,format=long + type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }} + type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }} + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: . + push: ${{ github.event_name == 'push' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + build-args: APT_CACHE_BUST=${{ github.run_id }} + + - name: Create git tag + if: github.event_name == 'push' && github.ref == 'refs/heads/main' + run: | + git tag "v${{ steps.calver.outputs.version }}" + git push origin "v${{ steps.calver.outputs.version }}" + + grype: + runs-on: runners-cartsnitch + needs: [build-and-push] + if: github.event_name == 'push' + steps: + - uses: actions/checkout@v4 + - name: Run Grype vulnerability scan + uses: anchore/sbom-action@v0 + with: + image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }} + format: spdx-json + output-file: sbom.spdx.json + - name: Upload SBOM + uses: actions/upload-artifact@v4 + with: + name: sbom + path: sbom.spdx.json + - name: Run Grype + uses: anchore/grype-action@v1 + with: + sbom: sbom.spdx.json + fail-on: high + ignore-file: .grype.yaml + + deploy-dev: + runs-on: runners-cartsnitch + needs: [grype] + if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/dev' + steps: + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.CARTSNITCH_APP_ID }} + private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: infra + + - name: Checkout infra repo + uses: actions/checkout@v4 + with: + repository: cartsnitch/infra + token: ${{ steps.app-token.outputs.token }} + ref: main + path: infra + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + + - name: Install kustomize + uses: imranismail/setup-kustomize@v2 + + - name: Update receiptwitness image tag + run: | + cd infra/apps/overlays/dev + kustomize edit set image ghcr.io/cartsnitch/receiptwitness:sha-${{ github.sha }} + + - name: Commit and push to infra + run: | + cd infra + git config user.name "cartsnitch-ci[bot]" + git config user.email "cartsnitch-ci[bot]@users.noreply.github.com" + git add apps/overlays/dev/kustomization.yaml + git commit -m "ci(dev): update receiptwitness to sha-${{ github.sha }}" + git pull --rebase origin main + git push origin main + + deploy-uat: + runs-on: runners-cartsnitch + needs: [grype] + if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/uat' + steps: + - name: Generate GitHub App token + id: app-token + uses: actions/create-github-app-token@v1 + with: + app-id: ${{ secrets.CARTSNITCH_APP_ID }} + private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }} + owner: ${{ github.repository_owner }} + repositories: infra + + - name: Checkout infra repo + uses: actions/checkout@v4 + with: + repository: cartsnitch/infra + token: ${{ steps.app-token.outputs.token }} + ref: main + path: infra + + - name: Install kubectl + uses: azure/setup-kubectl@v4 + + - name: Install kustomize + uses: imranismail/setup-kustomize@v2 + + - name: Update receiptwitness image tag + run: | + cd infra/apps/overlays/uat + kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ needs.build-and-push.outputs.calver_tag }} + + - name: Commit and push to infra + run: | + cd infra + git config user.name "cartsnitch-ci[bot]" + git config user.email "cartsnitch-ci[bot]@users.noreply.github.com" + git add apps/overlays/uat/kustomization.yaml + git commit -m "ci(uat): update receiptwitness to ${{ needs.build-and-push.outputs.calver_tag }}" + git pull --rebase origin main + git push origin main diff --git a/.grype.yaml b/.grype.yaml new file mode 100644 index 0000000..001d21a --- /dev/null +++ b/.grype.yaml @@ -0,0 +1,4 @@ +ignore: + # Python 3.12 CVEs — only fixed in 3.13+, cannot upgrade major version safely + - vulnerability: CVE-2025-13836 + - vulnerability: CVE-2026-4519 \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 255b742..a29b841 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -11,7 +11,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr | Repo | Service | Purpose | |------|---------|---------| -| `cartsnitch/common` | — | Shared models, schemas, utilities | +| `cartsnitch/common` | — | Shared models, schemas, utilities (extracted into individual service repos) | | `cartsnitch/receiptwitness` | ReceiptWitness | Purchase data ingestion via retailer scrapers (this repo) | | `cartsnitch/api` | API Gateway | Frontend-facing REST API | | `cartsnitch/cartsnitch` | Frontend | React PWA (mobile-first) | @@ -23,7 +23,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr ### Architecture Decisions - **Polyrepo:** Each service has its own repo, Dockerfile, CI/CD pipeline. -- **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models come from `cartsnitch-common`. +- **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models are inlined under `src/receiptwitness/shared/` (extracted from `cartsnitch-common` during the CAR-724 migration). - **Inter-service comms:** REST (synchronous) + Redis pub/sub (async events). - **Target scale:** 500–1,000 users. Each user has their own authenticated sessions to up to 3 retailers. @@ -60,7 +60,7 @@ ReceiptWitness authenticates with grocery retailer web portals using per-user se - Python 3.12+ - Playwright (Python async API) for headless browser automation - FastAPI (lightweight internal API for triggering scrapes, health checks, status) -- SQLAlchemy 2.0 (via `cartsnitch-common`) +- SQLAlchemy 2.0 (models inlined under `src/receiptwitness/shared/`) - Redis (pub/sub event publishing) - APScheduler or Celery (for scheduled scraping jobs) - cryptography / Fernet (encrypting stored session data) diff --git a/Dockerfile b/Dockerfile index 65418d2..8282c6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,22 +3,18 @@ FROM python:3.12-slim AS build WORKDIR /app -# build-essential and libpq-dev are needed to compile any C-extension wheels -# (e.g. psycopg2 fallback). No git needed — common/ is copied from the repo root. ARG APT_CACHE_BUST=1 RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ libpq-dev \ build-essential \ && rm -rf /var/lib/apt/lists/* -# Build context is the repo root. These paths are relative to the root. -COPY receiptwitness/pyproject.toml ./ -COPY receiptwitness/src/ ./src/ -COPY common/ ./common/ +# Build context is the receiptwitness repo root. +COPY pyproject.toml ./ +COPY src/ ./src/ -# Install from the local common/ (cartsnitch-common>=0.1.0 in pyproject.toml -# will be satisfied by the local package) then install receiptwitness itself. -RUN pip install --no-cache-dir --prefix=/install ./common/ . +# Install receiptwitness (shared modules are inlined under src/receiptwitness/shared/). +RUN pip install --no-cache-dir --prefix=/install . # Stage 2: Production image with Playwright + Chromium FROM python:3.12-slim AS prod @@ -50,7 +46,9 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco RUN adduser --system --group --uid 1000 app COPY --from=build /install /usr/local -COPY receiptwitness/src/ ./src/ +COPY src/ ./src/ +COPY alembic.ini ./ +COPY alembic/ ./alembic/ # Install Playwright Chromium browser (runs as root; /opt/playwright is world-readable) RUN PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install chromium @@ -63,4 +61,4 @@ EXPOSE 8000 HEALTHCHECK --interval=30s --timeout=3s \ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" -CMD ["uvicorn", "receiptwitness.main:app", "--host", "0.0.0.0", "--port", "8000"] +CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn receiptwitness.main:app --host 0.0.0.0 --port 8000"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..1ac7029 --- /dev/null +++ b/README.md @@ -0,0 +1,29 @@ +# ReceiptWitness + +Purchase data ingestion service for CartSnitch. Authenticates with grocery retailer web portals (Meijer, Kroger, Target) via Playwright, scrapes purchase history, and writes structured records to the shared PostgreSQL database. + +## Quick Start + +```bash +# Install dependencies +pip install -e ".[dev]" + +# Run tests +pytest tests/ -v + +# Local dev with Docker Compose +docker-compose up +``` + +## Architecture + +- **Scrapers:** Playwright-based browser automation for each retailer +- **Parsers:** Converts raw receipt data to structured `Purchase` / `PurchaseItem` records +- **Database:** SQLAlchemy 2.0 async; models inlined under `src/receiptwitness/shared/` +- **Events:** Publishes `cartsnitch.receipts.ingested` to Redis after ingestion + +## Branches + +- `dev` — development, auto-deploys to dev cluster +- `uat` — user acceptance testing +- `main` — production, auto-deploys to prod cluster diff --git a/alembic.ini b/alembic.ini new file mode 100644 index 0000000..00a0b14 --- /dev/null +++ b/alembic.ini @@ -0,0 +1,36 @@ +[alembic] +script_location = alembic +sqlalchemy.url = postgresql://localhost:5432/cartsnitch + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/alembic/env.py b/alembic/env.py new file mode 100644 index 0000000..8768aac --- /dev/null +++ b/alembic/env.py @@ -0,0 +1,51 @@ +"""Alembic environment configuration for CartSnitch.""" + +import os +from logging.config import fileConfig + +from sqlalchemy import engine_from_config, pool + +from alembic import context +from receiptwitness.shared.models.base import Base + +config = context.config +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +db_url = os.environ.get("CARTSNITCH_DATABASE_URL_SYNC") +if db_url: + config.set_main_option("sqlalchemy.url", db_url.replace("%", "%%")) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + """Run migrations in 'offline' mode.""" + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + dialect_opts={"paramstyle": "named"}, + ) + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online() -> None: + """Run migrations in 'online' mode.""" + connectable = engine_from_config( + config.get_section(config.config_ini_section, {}), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + with connectable.connect() as connection: + context.configure(connection=connection, target_metadata=target_metadata) + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/alembic/script.py.mako b/alembic/script.py.mako new file mode 100644 index 0000000..fe3b097 --- /dev/null +++ b/alembic/script.py.mako @@ -0,0 +1,25 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} +""" + +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op +${imports if imports else ""} + +revision: str = ${repr(up_revision)} +down_revision: Union[str, None] = ${repr(down_revision)} +branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} +depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} + + +def upgrade() -> None: + ${upgrades if upgrades else "pass"} + + +def downgrade() -> None: + ${downgrades if downgrades else "pass"} diff --git a/alembic/versions/001_add_email_inbound_token.py b/alembic/versions/001_add_email_inbound_token.py new file mode 100644 index 0000000..43a6fe8 --- /dev/null +++ b/alembic/versions/001_add_email_inbound_token.py @@ -0,0 +1,37 @@ +"""Add email_inbound_token to users. + +Revision ID: 001_add_email_inbound_token +Revises: +Create Date: 2026-04-02 +""" + +from collections.abc import Sequence + +import sqlalchemy as sa + +from alembic import op + +revision: str = "001_add_email_inbound_token" +down_revision: str | None = None +branch_labels: str | Sequence[str] | None = None +depends_on: str | Sequence[str] | None = None + + +def upgrade() -> None: + op.add_column("users", sa.Column("email_inbound_token", sa.String(22), nullable=True)) + op.create_unique_constraint("uq_users_email_inbound_token", "users", ["email_inbound_token"]) + + # Backfill existing users with generated tokens (PostgreSQL) + op.execute( + "UPDATE users SET email_inbound_token = " + "substring(replace(gen_random_uuid()::text, '-', ''), 1, 22) " + "WHERE email_inbound_token IS NULL" + ) + + # Alter to non-nullable + op.alter_column("users", "email_inbound_token", nullable=False) + + +def downgrade() -> None: + op.drop_constraint("uq_users_email_inbound_token", "users", type_="unique") + op.drop_column("users", "email_inbound_token") \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a698913..7a9d9d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,8 @@ version = "0.1.0" description = "CartSnitch receipt/purchase history ingestion service" requires-python = ">=3.12" dependencies = [ - "cartsnitch-common>=0.1.0", + "alembic>=1.13,<2.0", + "pydantic[email]>=2.0,<3.0", "playwright>=1.49,<2.0", "playwright-stealth>=1.0,<2.0", "cryptography>=46.0,<47.0", @@ -50,10 +51,6 @@ strict = false warn_return_any = true warn_unused_ignores = true -[[tool.mypy.overrides]] -module = "cartsnitch_common.*" -ignore_missing_imports = true - [tool.pytest.ini_options] asyncio_mode = "auto" testpaths = ["tests"] diff --git a/src/receiptwitness/events.py b/src/receiptwitness/events.py index a9e6204..25d6c8f 100644 --- a/src/receiptwitness/events.py +++ b/src/receiptwitness/events.py @@ -7,8 +7,8 @@ from datetime import UTC, datetime from decimal import Decimal import redis.asyncio as aioredis -from cartsnitch_common.database import get_async_session_factory -from cartsnitch_common.models.user import User +from receiptwitness.shared.database import get_async_session_factory +from receiptwitness.shared.models import User from sqlalchemy import select from receiptwitness.config import settings diff --git a/src/receiptwitness/pipeline/matching.py b/src/receiptwitness/pipeline/matching.py index 7e71039..882e0ea 100644 --- a/src/receiptwitness/pipeline/matching.py +++ b/src/receiptwitness/pipeline/matching.py @@ -7,9 +7,9 @@ and batch matching for purchase ingestion. import uuid from dataclasses import dataclass -from cartsnitch_common.constants import MatchConfidence -from cartsnitch_common.models.product import NormalizedProduct -from cartsnitch_common.schemas.purchase import PurchaseItemCreate +from receiptwitness.shared.constants import MatchConfidence +from receiptwitness.shared.models import NormalizedProduct +from receiptwitness.shared.schemas import PurchaseItemCreate from sqlalchemy.orm import Session from receiptwitness.pipeline.normalization import ( diff --git a/src/receiptwitness/pipeline/normalization.py b/src/receiptwitness/pipeline/normalization.py index a714020..9b9e2b7 100644 --- a/src/receiptwitness/pipeline/normalization.py +++ b/src/receiptwitness/pipeline/normalization.py @@ -10,7 +10,7 @@ import re from dataclasses import dataclass from enum import StrEnum -from cartsnitch_common.models.product import NormalizedProduct +from receiptwitness.shared.models import NormalizedProduct from sqlalchemy import cast, func, select, String from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Session diff --git a/src/receiptwitness/pipeline/receipt.py b/src/receiptwitness/pipeline/receipt.py index 7d3e863..4dbaf91 100644 --- a/src/receiptwitness/pipeline/receipt.py +++ b/src/receiptwitness/pipeline/receipt.py @@ -7,7 +7,7 @@ import re from datetime import date from decimal import Decimal, InvalidOperation -from cartsnitch_common.schemas.purchase import PurchaseCreate, PurchaseItemCreate +from receiptwitness.shared.schemas import PurchaseCreate, PurchaseItemCreate def _clean_product_name(raw: str) -> str: diff --git a/src/receiptwitness/shared/__init__.py b/src/receiptwitness/shared/__init__.py new file mode 100644 index 0000000..b2472e8 --- /dev/null +++ b/src/receiptwitness/shared/__init__.py @@ -0,0 +1,5 @@ +"""ReceiptWitness shared package — inlined from cartsnitch-common. + +This package contains the subset of cartsnitch-common needed by ReceiptWitness, +extracted and made self-contained so receiptwitness has no cross-repo dependency. +""" diff --git a/src/receiptwitness/shared/config.py b/src/receiptwitness/shared/config.py new file mode 100644 index 0000000..70b4153 --- /dev/null +++ b/src/receiptwitness/shared/config.py @@ -0,0 +1,18 @@ +"""Shared configuration for CartSnitch services via pydantic-settings.""" + +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Environment-driven settings shared by all CartSnitch services.""" + + model_config = SettingsConfigDict(env_prefix="CARTSNITCH_", env_file=".env") + + database_url: str = "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch" + database_url_sync: str = "postgresql+psycopg2://cartsnitch:cartsnitch@localhost:5432/cartsnitch" + redis_url: str = "redis://localhost:6379/0" + debug: bool = False + log_level: str = "INFO" + + +settings = Settings() diff --git a/src/receiptwitness/shared/constants.py b/src/receiptwitness/shared/constants.py new file mode 100644 index 0000000..b7a716c --- /dev/null +++ b/src/receiptwitness/shared/constants.py @@ -0,0 +1,85 @@ +"""Constants and enums shared across CartSnitch services.""" + +from enum import StrEnum + + +class StoreSlug(StrEnum): + """Supported retailer slugs.""" + + MEIJER = "meijer" + KROGER = "kroger" + TARGET = "target" + + +class AccountStatus(StrEnum): + """User store account link status.""" + + ACTIVE = "active" + EXPIRED = "expired" + ERROR = "error" + + +class DiscountType(StrEnum): + """Coupon discount type.""" + + PERCENT = "percent" + FIXED = "fixed" + BOGO = "bogo" + BUY_X_GET_Y = "buy_x_get_y" + + +class PriceSource(StrEnum): + """Source of a price observation.""" + + RECEIPT = "receipt" + CATALOG = "catalog" + WEEKLY_AD = "weekly_ad" + + +class EventType(StrEnum): + """Redis pub/sub event types.""" + + RECEIPTS_INGESTED = "cartsnitch.receipts.ingested" + PRICES_UPDATED = "cartsnitch.prices.updated" + PRODUCTS_NORMALIZED = "cartsnitch.products.normalized" + COUPONS_UPDATED = "cartsnitch.coupons.updated" + ALERT_PRICE_INCREASE = "cartsnitch.alerts.price_increase" + ALERT_SHRINKFLATION = "cartsnitch.alerts.shrinkflation" + + +class ProductCategory(StrEnum): + """Top-level product categories.""" + + PRODUCE = "produce" + DAIRY = "dairy" + MEAT = "meat" + BAKERY = "bakery" + FROZEN = "frozen" + PANTRY = "pantry" + BEVERAGES = "beverages" + SNACKS = "snacks" + HOUSEHOLD = "household" + PERSONAL_CARE = "personal_care" + OTHER = "other" + + +class MatchConfidence(StrEnum): + """Confidence level for product matching.""" + + HIGH = "high" + MEDIUM = "medium" + LOW = "low" + + +class SizeUnit(StrEnum): + """Standardized product size units.""" + + OZ = "oz" + FL_OZ = "fl_oz" + LB = "lb" + G = "g" + KG = "kg" + ML = "ml" + L = "l" + CT = "ct" + PK = "pk" diff --git a/src/receiptwitness/shared/database.py b/src/receiptwitness/shared/database.py new file mode 100644 index 0000000..21cca5b --- /dev/null +++ b/src/receiptwitness/shared/database.py @@ -0,0 +1,45 @@ +"""Database engine and session factories for sync and async usage.""" + +from collections.abc import AsyncGenerator, Generator + +from sqlalchemy import create_engine +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.orm import Session, sessionmaker + +from receiptwitness.shared.config import settings + + +def get_async_engine(url: str | None = None): + """Create an async SQLAlchemy engine.""" + return create_async_engine(url or settings.database_url, echo=settings.debug) + + +def get_sync_engine(url: str | None = None): + """Create a sync SQLAlchemy engine.""" + return create_engine(url or settings.database_url_sync, echo=settings.debug) + + +def get_async_session_factory(url: str | None = None) -> async_sessionmaker[AsyncSession]: + """Create an async session factory.""" + engine = get_async_engine(url) + return async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + +def get_sync_session_factory(url: str | None = None) -> sessionmaker[Session]: + """Create a sync session factory.""" + engine = get_sync_engine(url) + return sessionmaker(engine, expire_on_commit=False) + + +async def get_async_session(url: str | None = None) -> AsyncGenerator[AsyncSession, None]: + """Dependency for async session injection.""" + factory = get_async_session_factory(url) + async with factory() as session: + yield session + + +def get_sync_session(url: str | None = None) -> Generator[Session, None, None]: + """Dependency for sync session injection.""" + factory = get_sync_session_factory(url) + with factory() as session: + yield session diff --git a/src/receiptwitness/shared/models/__init__.py b/src/receiptwitness/shared/models/__init__.py new file mode 100644 index 0000000..6c9afc7 --- /dev/null +++ b/src/receiptwitness/shared/models/__init__.py @@ -0,0 +1,23 @@ +"""ReceiptWitness ORM models — inlined from cartsnitch-common.""" + +from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin +from receiptwitness.shared.models.product import NormalizedProduct +from receiptwitness.shared.models.user import User, UserStoreAccount + +# Stub models — needed for relationship resolution but not directly used by receiptwitness. +# Full definitions live in cartsnitch/common. +from receiptwitness.shared.models.stub_store import Store, StoreLocation +from receiptwitness.shared.models.stub_purchase import Purchase, PurchaseItem + +__all__ = [ + "Base", + "TimestampMixin", + "UUIDPrimaryKeyMixin", + "NormalizedProduct", + "Purchase", + "PurchaseItem", + "Store", + "StoreLocation", + "User", + "UserStoreAccount", +] diff --git a/src/receiptwitness/shared/models/base.py b/src/receiptwitness/shared/models/base.py new file mode 100644 index 0000000..806f128 --- /dev/null +++ b/src/receiptwitness/shared/models/base.py @@ -0,0 +1,30 @@ +"""Base model and mixins for all ReceiptWitness ORM models.""" + +import uuid +from datetime import datetime + +from sqlalchemy import DateTime, func +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + """Base class for all ReceiptWitness models.""" + + +class TimestampMixin: + """Mixin providing created_at / updated_at columns.""" + + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), nullable=False + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False + ) + + +class UUIDPrimaryKeyMixin: + """Mixin providing a UUID primary key.""" + + id: Mapped[uuid.UUID] = mapped_column( + primary_key=True, default=uuid.uuid4, server_default=func.gen_random_uuid() + ) diff --git a/src/receiptwitness/shared/models/product.py b/src/receiptwitness/shared/models/product.py new file mode 100644 index 0000000..f430914 --- /dev/null +++ b/src/receiptwitness/shared/models/product.py @@ -0,0 +1,26 @@ +"""NormalizedProduct model — the canonical product identity.""" + +from typing import TYPE_CHECKING + +from sqlalchemy import JSON, String +from sqlalchemy.orm import Mapped, mapped_column + +from receiptwitness.shared.constants import ProductCategory, SizeUnit +from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin + +if TYPE_CHECKING: + pass + + +class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Canonical product identity — matches products across retailers.""" + + __tablename__ = "normalized_products" + + canonical_name: Mapped[str] = mapped_column(String(300), nullable=False) + category: Mapped[ProductCategory | None] = mapped_column(String(50)) + subcategory: Mapped[str | None] = mapped_column(String(100)) + brand: Mapped[str | None] = mapped_column(String(200)) + size: Mapped[str | None] = mapped_column(String(50)) + size_unit: Mapped[SizeUnit | None] = mapped_column(String(10)) + upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list) diff --git a/src/receiptwitness/shared/models/stub_purchase.py b/src/receiptwitness/shared/models/stub_purchase.py new file mode 100644 index 0000000..2822291 --- /dev/null +++ b/src/receiptwitness/shared/models/stub_purchase.py @@ -0,0 +1,64 @@ +"""Stub Purchase and PurchaseItem models. + +These are minimal stubs of the full cartsnitch-common Purchase/PurchaseItem models. +They exist solely to satisfy SQLAlchemy relationship resolution for User and +UserStoreAccount. The canonical definitions live in cartsnitch/common. +""" + +import uuid +from datetime import date, datetime +from decimal import Decimal + +from sqlalchemy import JSON, Date, DateTime, ForeignKey, Index, Numeric, String, UniqueConstraint, func +from sqlalchemy.orm import Mapped, mapped_column + +from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin + + +class Purchase(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Stub: a shopping trip/receipt. Full definition in cartsnitch/common.""" + + __tablename__ = "purchases" + + user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False) + store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False) + store_location_id: Mapped[uuid.UUID | None] = mapped_column(ForeignKey("store_locations.id")) + receipt_id: Mapped[str] = mapped_column(String(200), nullable=False) + purchase_date: Mapped[date] = mapped_column(Date, nullable=False) + total: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False) + subtotal: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + tax: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + savings_total: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + source_url: Mapped[str | None] = mapped_column(String(500)) + raw_data: Mapped[dict | None] = mapped_column(JSON) + ingested_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + server_default=func.now(), + nullable=False, + ) + + __table_args__ = ( + Index("ix_purchases_user_store", "user_id", "store_id"), + UniqueConstraint("user_id", "store_id", "receipt_id", name="uq_purchase_receipt"), + ) + + +class PurchaseItem(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Stub: a line item on a receipt. Full definition in cartsnitch/common.""" + + __tablename__ = "purchase_items" + + purchase_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("purchases.id"), nullable=False) + product_name_raw: Mapped[str] = mapped_column(String(300), nullable=False) + upc: Mapped[str | None] = mapped_column(String(20)) + quantity: Mapped[Decimal] = mapped_column(Numeric(10, 3), nullable=False, default=1) + unit_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False) + extended_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False) + regular_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + sale_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + coupon_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + loyalty_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2)) + category_raw: Mapped[str | None] = mapped_column(String(100)) + normalized_product_id: Mapped[uuid.UUID | None] = mapped_column( + ForeignKey("normalized_products.id") + ) diff --git a/src/receiptwitness/shared/models/stub_store.py b/src/receiptwitness/shared/models/stub_store.py new file mode 100644 index 0000000..e039d36 --- /dev/null +++ b/src/receiptwitness/shared/models/stub_store.py @@ -0,0 +1,39 @@ +"""Stub Store and StoreLocation models. + +These are minimal stubs of the full cartsnitch-common Store/StoreLocation models. +They exist solely to satisfy SQLAlchemy relationship resolution for User and +UserStoreAccount. The canonical definitions live in cartsnitch/common. +""" + +import uuid + +from sqlalchemy import Float, ForeignKey, String +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from receiptwitness.shared.constants import StoreSlug +from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin + + +class Store(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Stub: canonical retailer. Full definition in cartsnitch/common.""" + + __tablename__ = "stores" + + name: Mapped[str] = mapped_column(String(100), nullable=False) + slug: Mapped[StoreSlug] = mapped_column(String(20), nullable=False, unique=True) + logo_url: Mapped[str | None] = mapped_column(String(500)) + website_url: Mapped[str | None] = mapped_column(String(500)) + + +class StoreLocation(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Stub: physical store location. Full definition in cartsnitch/common.""" + + __tablename__ = "store_locations" + + store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False) + address: Mapped[str] = mapped_column(String(300), nullable=False) + city: Mapped[str] = mapped_column(String(100), nullable=False) + state: Mapped[str] = mapped_column(String(2), nullable=False) + zip: Mapped[str] = mapped_column(String(10), nullable=False) + lat: Mapped[float | None] = mapped_column(Float) + lng: Mapped[float | None] = mapped_column(Float) diff --git a/src/receiptwitness/shared/models/user.py b/src/receiptwitness/shared/models/user.py new file mode 100644 index 0000000..b458561 --- /dev/null +++ b/src/receiptwitness/shared/models/user.py @@ -0,0 +1,63 @@ +"""User and UserStoreAccount models.""" + +import secrets +import uuid +from datetime import datetime +from typing import TYPE_CHECKING + +from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint, text +from sqlalchemy.orm import Mapped, mapped_column, relationship + +from receiptwitness.shared.constants import AccountStatus +from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin + +if TYPE_CHECKING: + from receiptwitness.shared.models.stub_purchase import Purchase + from receiptwitness.shared.models.stub_store import Store + + +class User(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Application user.""" + + __tablename__ = "users" + + email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True) + email_inbound_token: Mapped[str] = mapped_column( + String(22), + nullable=False, + unique=True, + default=lambda: secrets.token_urlsafe(16), + server_default=text( + "replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')" + ), + ) + hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True) + display_name: Mapped[str | None] = mapped_column(String(100)) + email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false") + image: Mapped[str | None] = mapped_column(Text, nullable=True) + + # Relationships + store_accounts: Mapped[list["UserStoreAccount"]] = relationship(back_populates="user") + purchases: Mapped[list["Purchase"]] = relationship(back_populates="user") + + +class UserStoreAccount(UUIDPrimaryKeyMixin, TimestampMixin, Base): + """Link between a user and their retailer account credentials.""" + + __tablename__ = "user_store_accounts" + __table_args__ = (UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),) + + user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False) + store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False) + # WARNING: Contains retailer session cookies/tokens. Encryption-at-rest + # required before production deployment (e.g., pgcrypto or app-level encryption). + session_data: Mapped[dict | None] = mapped_column(JSON) + session_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + status: Mapped[AccountStatus] = mapped_column( + String(20), nullable=False, default=AccountStatus.ACTIVE + ) + + # Relationships + user: Mapped["User"] = relationship(back_populates="store_accounts") + store: Mapped["Store"] = relationship(back_populates="user_accounts") diff --git a/src/receiptwitness/shared/schemas/__init__.py b/src/receiptwitness/shared/schemas/__init__.py new file mode 100644 index 0000000..f20de0e --- /dev/null +++ b/src/receiptwitness/shared/schemas/__init__.py @@ -0,0 +1,5 @@ +"""ReceiptWitness Pydantic schemas — inlined from cartsnitch-common.""" + +from receiptwitness.shared.schemas.purchase import PurchaseCreate, PurchaseItemCreate + +__all__ = ["PurchaseCreate", "PurchaseItemCreate"] diff --git a/src/receiptwitness/shared/schemas/purchase.py b/src/receiptwitness/shared/schemas/purchase.py new file mode 100644 index 0000000..05959be --- /dev/null +++ b/src/receiptwitness/shared/schemas/purchase.py @@ -0,0 +1,73 @@ +"""Purchase and PurchaseItem Pydantic schemas.""" + +import uuid +from datetime import date, datetime +from decimal import Decimal + +from pydantic import BaseModel + + +class PurchaseItemCreate(BaseModel): + product_name_raw: str + upc: str | None = None + quantity: Decimal = Decimal("1") + unit_price: Decimal + extended_price: Decimal + regular_price: Decimal | None = None + sale_price: Decimal | None = None + coupon_discount: Decimal | None = None + loyalty_discount: Decimal | None = None + category_raw: str | None = None + normalized_product_id: uuid.UUID | None = None + + +class PurchaseItemRead(BaseModel): + model_config = {"from_attributes": True} + + id: uuid.UUID + purchase_id: uuid.UUID + product_name_raw: str + upc: str | None + quantity: Decimal + unit_price: Decimal + extended_price: Decimal + regular_price: Decimal | None + sale_price: Decimal | None + coupon_discount: Decimal | None + loyalty_discount: Decimal | None + category_raw: str | None + normalized_product_id: uuid.UUID | None + + +class PurchaseCreate(BaseModel): + user_id: uuid.UUID + store_id: uuid.UUID + store_location_id: uuid.UUID | None = None + receipt_id: str + purchase_date: date + total: Decimal + subtotal: Decimal | None = None + tax: Decimal | None = None + savings_total: Decimal | None = None + source_url: str | None = None + raw_data: dict | None = None + items: list[PurchaseItemCreate] = [] + + +class PurchaseRead(BaseModel): + model_config = {"from_attributes": True} + + id: uuid.UUID + user_id: uuid.UUID + store_id: uuid.UUID + store_location_id: uuid.UUID | None + receipt_id: str + purchase_date: date + total: Decimal + subtotal: Decimal | None + tax: Decimal | None + savings_total: Decimal | None + source_url: str | None + ingested_at: datetime + created_at: datetime + updated_at: datetime diff --git a/src/receiptwitness/worker/email_worker.py b/src/receiptwitness/worker/email_worker.py index 52a5dc0..1688f1d 100644 --- a/src/receiptwitness/worker/email_worker.py +++ b/src/receiptwitness/worker/email_worker.py @@ -3,8 +3,8 @@ import asyncio import logging -from cartsnitch_common.database import get_async_session_factory -from cartsnitch_common.models.user import User +from receiptwitness.shared.database import get_async_session_factory +from receiptwitness.shared.models import User from sqlalchemy import select from receiptwitness.config import settings diff --git a/tests/test_pipeline/conftest.py b/tests/test_pipeline/conftest.py index 693366f..98b90ab 100644 --- a/tests/test_pipeline/conftest.py +++ b/tests/test_pipeline/conftest.py @@ -1,7 +1,7 @@ """Shared test fixtures for pipeline tests.""" import pytest -from cartsnitch_common.models.base import Base +from receiptwitness.shared.models import Base from sqlalchemy import create_engine from sqlalchemy.orm import sessionmaker diff --git a/tests/test_pipeline/test_matching.py b/tests/test_pipeline/test_matching.py index 408153c..310396b 100644 --- a/tests/test_pipeline/test_matching.py +++ b/tests/test_pipeline/test_matching.py @@ -4,9 +4,9 @@ import uuid from datetime import UTC, datetime from decimal import Decimal -from cartsnitch_common.constants import MatchConfidence -from cartsnitch_common.models.product import NormalizedProduct -from cartsnitch_common.schemas.purchase import PurchaseItemCreate +from receiptwitness.shared.constants import MatchConfidence +from receiptwitness.shared.models import NormalizedProduct +from receiptwitness.shared.schemas import PurchaseItemCreate from receiptwitness.pipeline.matching import ( ProductMatcher, diff --git a/tests/test_pipeline/test_normalization.py b/tests/test_pipeline/test_normalization.py index de1d566..67a7f99 100644 --- a/tests/test_pipeline/test_normalization.py +++ b/tests/test_pipeline/test_normalization.py @@ -3,7 +3,7 @@ import uuid from datetime import UTC, datetime -from cartsnitch_common.models.product import NormalizedProduct +from receiptwitness.shared.models import NormalizedProduct from receiptwitness.pipeline.normalization import ( MatchMethod,