feat: migrate receiptwitness to standalone repo with inlined common

Extract receiptwitness/ from the monorepo into cartsnitch/receiptwitness.
Inline the consumed modules from cartsnitch-common so there is no
cross-repo dependency.

- Add src/receiptwitness/shared/ with inlined models, schemas, constants, database
- Update all imports from cartsnitch_common to receiptwitness.shared
- Remove cartsnitch-common dependency from pyproject.toml
- Copy and update Alembic config (alembic.ini, alembic/)
- Update Dockerfile for standalone build context, add migration CMD
- Add CI workflow with lint, test, build, grype scan, deploy-dev, deploy-uat
- Add .grype.yaml

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Barcode Betty
2026-04-19 12:18:11 +00:00
parent bf7cabc9d8
commit f47da487da
30 changed files with 898 additions and 33 deletions
+212
View File
@@ -0,0 +1,212 @@
name: CI
on:
push:
branches: [main, dev, uat]
pull_request:
branches: [main, dev, uat]
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
packages: write
env:
REGISTRY: ghcr.io
IMAGE_NAME: cartsnitch/receiptwitness
jobs:
lint:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install ruff
- name: Lint
run: ruff check src/ tests/
test:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Run tests
run: pytest tests/ -v
build-and-push:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
needs: [lint, test]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate CalVer tag
id: calver
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
DATE_TAG=$(date -u +%Y.%m.%d)
EXISTING=$(git tag -l "v${DATE_TAG}*" | sort -V | tail -1)
if [ -z "$EXISTING" ]; then VERSION="$DATE_TAG"
elif [ "$EXISTING" = "v${DATE_TAG}" ]; then VERSION="${DATE_TAG}.2"
else BUILD_NUM=$(echo "$EXISTING" | sed "s/v${DATE_TAG}\.//"); VERSION="${DATE_TAG}.$((BUILD_NUM + 1))"; fi
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "CalVer tag: $VERSION"
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: ${{ github.event_name == 'push' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: APT_CACHE_BUST=${{ github.run_id }}
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
git tag "v${{ steps.calver.outputs.version }}"
git push origin "v${{ steps.calver.outputs.version }}"
grype:
runs-on: runners-cartsnitch
needs: [build-and-push]
if: github.event_name == 'push'
steps:
- uses: actions/checkout@v4
- name: Run Grype vulnerability scan
uses: anchore/sbom-action@v0
with:
image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }}
format: spdx-json
output-file: sbom.spdx.json
- name: Upload SBOM
uses: actions/upload-artifact@v4
with:
name: sbom
path: sbom.spdx.json
- name: Run Grype
uses: anchore/grype-action@v1
with:
sbom: sbom.spdx.json
fail-on: high
ignore-file: .grype.yaml
deploy-dev:
runs-on: runners-cartsnitch
needs: [grype]
if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/dev'
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.CARTSNITCH_APP_ID }}
private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: infra
- name: Checkout infra repo
uses: actions/checkout@v4
with:
repository: cartsnitch/infra
token: ${{ steps.app-token.outputs.token }}
ref: main
path: infra
- name: Install kubectl
uses: azure/setup-kubectl@v4
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Update receiptwitness image tag
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:sha-${{ github.sha }}
- name: Commit and push to infra
run: |
cd infra
git config user.name "cartsnitch-ci[bot]"
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/dev/kustomization.yaml
git commit -m "ci(dev): update receiptwitness to sha-${{ github.sha }}"
git pull --rebase origin main
git push origin main
deploy-uat:
runs-on: runners-cartsnitch
needs: [grype]
if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/uat'
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.CARTSNITCH_APP_ID }}
private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: infra
- name: Checkout infra repo
uses: actions/checkout@v4
with:
repository: cartsnitch/infra
token: ${{ steps.app-token.outputs.token }}
ref: main
path: infra
- name: Install kubectl
uses: azure/setup-kubectl@v4
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Update receiptwitness image tag
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ needs.build-and-push.outputs.calver_tag }}
- name: Commit and push to infra
run: |
cd infra
git config user.name "cartsnitch-ci[bot]"
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/uat/kustomization.yaml
git commit -m "ci(uat): update receiptwitness to ${{ needs.build-and-push.outputs.calver_tag }}"
git pull --rebase origin main
git push origin main
+4
View File
@@ -0,0 +1,4 @@
ignore:
# Python 3.12 CVEs — only fixed in 3.13+, cannot upgrade major version safely
- vulnerability: CVE-2025-13836
- vulnerability: CVE-2026-4519
+3 -3
View File
@@ -11,7 +11,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr
| Repo | Service | Purpose | | Repo | Service | Purpose |
|------|---------|---------| |------|---------|---------|
| `cartsnitch/common` | — | Shared models, schemas, utilities | | `cartsnitch/common` | — | Shared models, schemas, utilities (extracted into individual service repos) |
| `cartsnitch/receiptwitness` | ReceiptWitness | Purchase data ingestion via retailer scrapers (this repo) | | `cartsnitch/receiptwitness` | ReceiptWitness | Purchase data ingestion via retailer scrapers (this repo) |
| `cartsnitch/api` | API Gateway | Frontend-facing REST API | | `cartsnitch/api` | API Gateway | Frontend-facing REST API |
| `cartsnitch/cartsnitch` | Frontend | React PWA (mobile-first) | | `cartsnitch/cartsnitch` | Frontend | React PWA (mobile-first) |
@@ -23,7 +23,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr
### Architecture Decisions ### Architecture Decisions
- **Polyrepo:** Each service has its own repo, Dockerfile, CI/CD pipeline. - **Polyrepo:** Each service has its own repo, Dockerfile, CI/CD pipeline.
- **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models come from `cartsnitch-common`. - **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models are inlined under `src/receiptwitness/shared/` (extracted from `cartsnitch-common` during the CAR-724 migration).
- **Inter-service comms:** REST (synchronous) + Redis pub/sub (async events). - **Inter-service comms:** REST (synchronous) + Redis pub/sub (async events).
- **Target scale:** 5001,000 users. Each user has their own authenticated sessions to up to 3 retailers. - **Target scale:** 5001,000 users. Each user has their own authenticated sessions to up to 3 retailers.
@@ -60,7 +60,7 @@ ReceiptWitness authenticates with grocery retailer web portals using per-user se
- Python 3.12+ - Python 3.12+
- Playwright (Python async API) for headless browser automation - Playwright (Python async API) for headless browser automation
- FastAPI (lightweight internal API for triggering scrapes, health checks, status) - FastAPI (lightweight internal API for triggering scrapes, health checks, status)
- SQLAlchemy 2.0 (via `cartsnitch-common`) - SQLAlchemy 2.0 (models inlined under `src/receiptwitness/shared/`)
- Redis (pub/sub event publishing) - Redis (pub/sub event publishing)
- APScheduler or Celery (for scheduled scraping jobs) - APScheduler or Celery (for scheduled scraping jobs)
- cryptography / Fernet (encrypting stored session data) - cryptography / Fernet (encrypting stored session data)
+9 -11
View File
@@ -3,22 +3,18 @@ FROM python:3.12-slim AS build
WORKDIR /app WORKDIR /app
# build-essential and libpq-dev are needed to compile any C-extension wheels
# (e.g. psycopg2 fallback). No git needed — common/ is copied from the repo root.
ARG APT_CACHE_BUST=1 ARG APT_CACHE_BUST=1
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
libpq-dev \ libpq-dev \
build-essential \ build-essential \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# Build context is the repo root. These paths are relative to the root. # Build context is the receiptwitness repo root.
COPY receiptwitness/pyproject.toml ./ COPY pyproject.toml ./
COPY receiptwitness/src/ ./src/ COPY src/ ./src/
COPY common/ ./common/
# Install from the local common/ (cartsnitch-common>=0.1.0 in pyproject.toml # Install receiptwitness (shared modules are inlined under src/receiptwitness/shared/).
# will be satisfied by the local package) then install receiptwitness itself. RUN pip install --no-cache-dir --prefix=/install .
RUN pip install --no-cache-dir --prefix=/install ./common/ .
# Stage 2: Production image with Playwright + Chromium # Stage 2: Production image with Playwright + Chromium
FROM python:3.12-slim AS prod FROM python:3.12-slim AS prod
@@ -50,7 +46,9 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
RUN adduser --system --group --uid 1000 app RUN adduser --system --group --uid 1000 app
COPY --from=build /install /usr/local COPY --from=build /install /usr/local
COPY receiptwitness/src/ ./src/ COPY src/ ./src/
COPY alembic.ini ./
COPY alembic/ ./alembic/
# Install Playwright Chromium browser (runs as root; /opt/playwright is world-readable) # Install Playwright Chromium browser (runs as root; /opt/playwright is world-readable)
RUN PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install chromium RUN PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install chromium
@@ -63,4 +61,4 @@ EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=3s \ HEALTHCHECK --interval=30s --timeout=3s \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
CMD ["uvicorn", "receiptwitness.main:app", "--host", "0.0.0.0", "--port", "8000"] CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn receiptwitness.main:app --host 0.0.0.0 --port 8000"]
+29
View File
@@ -0,0 +1,29 @@
# ReceiptWitness
Purchase data ingestion service for CartSnitch. Authenticates with grocery retailer web portals (Meijer, Kroger, Target) via Playwright, scrapes purchase history, and writes structured records to the shared PostgreSQL database.
## Quick Start
```bash
# Install dependencies
pip install -e ".[dev]"
# Run tests
pytest tests/ -v
# Local dev with Docker Compose
docker-compose up
```
## Architecture
- **Scrapers:** Playwright-based browser automation for each retailer
- **Parsers:** Converts raw receipt data to structured `Purchase` / `PurchaseItem` records
- **Database:** SQLAlchemy 2.0 async; models inlined under `src/receiptwitness/shared/`
- **Events:** Publishes `cartsnitch.receipts.ingested` to Redis after ingestion
## Branches
- `dev` — development, auto-deploys to dev cluster
- `uat` — user acceptance testing
- `main` — production, auto-deploys to prod cluster
+36
View File
@@ -0,0 +1,36 @@
[alembic]
script_location = alembic
sqlalchemy.url = postgresql://localhost:5432/cartsnitch
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+51
View File
@@ -0,0 +1,51 @@
"""Alembic environment configuration for CartSnitch."""
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from receiptwitness.shared.models.base import Base
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
db_url = os.environ.get("CARTSNITCH_DATABASE_URL_SYNC")
if db_url:
config.set_main_option("sqlalchemy.url", db_url.replace("%", "%%"))
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+25
View File
@@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}
@@ -0,0 +1,37 @@
"""Add email_inbound_token to users.
Revision ID: 001_add_email_inbound_token
Revises:
Create Date: 2026-04-02
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "001_add_email_inbound_token"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.add_column("users", sa.Column("email_inbound_token", sa.String(22), nullable=True))
op.create_unique_constraint("uq_users_email_inbound_token", "users", ["email_inbound_token"])
# Backfill existing users with generated tokens (PostgreSQL)
op.execute(
"UPDATE users SET email_inbound_token = "
"substring(replace(gen_random_uuid()::text, '-', ''), 1, 22) "
"WHERE email_inbound_token IS NULL"
)
# Alter to non-nullable
op.alter_column("users", "email_inbound_token", nullable=False)
def downgrade() -> None:
op.drop_constraint("uq_users_email_inbound_token", "users", type_="unique")
op.drop_column("users", "email_inbound_token")
+2 -5
View File
@@ -8,7 +8,8 @@ version = "0.1.0"
description = "CartSnitch receipt/purchase history ingestion service" description = "CartSnitch receipt/purchase history ingestion service"
requires-python = ">=3.12" requires-python = ">=3.12"
dependencies = [ dependencies = [
"cartsnitch-common>=0.1.0", "alembic>=1.13,<2.0",
"pydantic[email]>=2.0,<3.0",
"playwright>=1.49,<2.0", "playwright>=1.49,<2.0",
"playwright-stealth>=1.0,<2.0", "playwright-stealth>=1.0,<2.0",
"cryptography>=46.0,<47.0", "cryptography>=46.0,<47.0",
@@ -50,10 +51,6 @@ strict = false
warn_return_any = true warn_return_any = true
warn_unused_ignores = true warn_unused_ignores = true
[[tool.mypy.overrides]]
module = "cartsnitch_common.*"
ignore_missing_imports = true
[tool.pytest.ini_options] [tool.pytest.ini_options]
asyncio_mode = "auto" asyncio_mode = "auto"
testpaths = ["tests"] testpaths = ["tests"]
+2 -2
View File
@@ -7,8 +7,8 @@ from datetime import UTC, datetime
from decimal import Decimal from decimal import Decimal
import redis.asyncio as aioredis import redis.asyncio as aioredis
from cartsnitch_common.database import get_async_session_factory from receiptwitness.shared.database import get_async_session_factory
from cartsnitch_common.models.user import User from receiptwitness.shared.models import User
from sqlalchemy import select from sqlalchemy import select
from receiptwitness.config import settings from receiptwitness.config import settings
+3 -3
View File
@@ -7,9 +7,9 @@ and batch matching for purchase ingestion.
import uuid import uuid
from dataclasses import dataclass from dataclasses import dataclass
from cartsnitch_common.constants import MatchConfidence from receiptwitness.shared.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct from receiptwitness.shared.models import NormalizedProduct
from cartsnitch_common.schemas.purchase import PurchaseItemCreate from receiptwitness.shared.schemas import PurchaseItemCreate
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
from receiptwitness.pipeline.normalization import ( from receiptwitness.pipeline.normalization import (
+1 -1
View File
@@ -10,7 +10,7 @@ import re
from dataclasses import dataclass from dataclasses import dataclass
from enum import StrEnum from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct from receiptwitness.shared.models import NormalizedProduct
from sqlalchemy import cast, func, select, String from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session from sqlalchemy.orm import Session
+1 -1
View File
@@ -7,7 +7,7 @@ import re
from datetime import date from datetime import date
from decimal import Decimal, InvalidOperation from decimal import Decimal, InvalidOperation
from cartsnitch_common.schemas.purchase import PurchaseCreate, PurchaseItemCreate from receiptwitness.shared.schemas import PurchaseCreate, PurchaseItemCreate
def _clean_product_name(raw: str) -> str: def _clean_product_name(raw: str) -> str:
+5
View File
@@ -0,0 +1,5 @@
"""ReceiptWitness shared package — inlined from cartsnitch-common.
This package contains the subset of cartsnitch-common needed by ReceiptWitness,
extracted and made self-contained so receiptwitness has no cross-repo dependency.
"""
+18
View File
@@ -0,0 +1,18 @@
"""Shared configuration for CartSnitch services via pydantic-settings."""
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Environment-driven settings shared by all CartSnitch services."""
model_config = SettingsConfigDict(env_prefix="CARTSNITCH_", env_file=".env")
database_url: str = "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
database_url_sync: str = "postgresql+psycopg2://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
redis_url: str = "redis://localhost:6379/0"
debug: bool = False
log_level: str = "INFO"
settings = Settings()
+85
View File
@@ -0,0 +1,85 @@
"""Constants and enums shared across CartSnitch services."""
from enum import StrEnum
class StoreSlug(StrEnum):
"""Supported retailer slugs."""
MEIJER = "meijer"
KROGER = "kroger"
TARGET = "target"
class AccountStatus(StrEnum):
"""User store account link status."""
ACTIVE = "active"
EXPIRED = "expired"
ERROR = "error"
class DiscountType(StrEnum):
"""Coupon discount type."""
PERCENT = "percent"
FIXED = "fixed"
BOGO = "bogo"
BUY_X_GET_Y = "buy_x_get_y"
class PriceSource(StrEnum):
"""Source of a price observation."""
RECEIPT = "receipt"
CATALOG = "catalog"
WEEKLY_AD = "weekly_ad"
class EventType(StrEnum):
"""Redis pub/sub event types."""
RECEIPTS_INGESTED = "cartsnitch.receipts.ingested"
PRICES_UPDATED = "cartsnitch.prices.updated"
PRODUCTS_NORMALIZED = "cartsnitch.products.normalized"
COUPONS_UPDATED = "cartsnitch.coupons.updated"
ALERT_PRICE_INCREASE = "cartsnitch.alerts.price_increase"
ALERT_SHRINKFLATION = "cartsnitch.alerts.shrinkflation"
class ProductCategory(StrEnum):
"""Top-level product categories."""
PRODUCE = "produce"
DAIRY = "dairy"
MEAT = "meat"
BAKERY = "bakery"
FROZEN = "frozen"
PANTRY = "pantry"
BEVERAGES = "beverages"
SNACKS = "snacks"
HOUSEHOLD = "household"
PERSONAL_CARE = "personal_care"
OTHER = "other"
class MatchConfidence(StrEnum):
"""Confidence level for product matching."""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
class SizeUnit(StrEnum):
"""Standardized product size units."""
OZ = "oz"
FL_OZ = "fl_oz"
LB = "lb"
G = "g"
KG = "kg"
ML = "ml"
L = "l"
CT = "ct"
PK = "pk"
+45
View File
@@ -0,0 +1,45 @@
"""Database engine and session factories for sync and async usage."""
from collections.abc import AsyncGenerator, Generator
from sqlalchemy import create_engine
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import Session, sessionmaker
from receiptwitness.shared.config import settings
def get_async_engine(url: str | None = None):
"""Create an async SQLAlchemy engine."""
return create_async_engine(url or settings.database_url, echo=settings.debug)
def get_sync_engine(url: str | None = None):
"""Create a sync SQLAlchemy engine."""
return create_engine(url or settings.database_url_sync, echo=settings.debug)
def get_async_session_factory(url: str | None = None) -> async_sessionmaker[AsyncSession]:
"""Create an async session factory."""
engine = get_async_engine(url)
return async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
def get_sync_session_factory(url: str | None = None) -> sessionmaker[Session]:
"""Create a sync session factory."""
engine = get_sync_engine(url)
return sessionmaker(engine, expire_on_commit=False)
async def get_async_session(url: str | None = None) -> AsyncGenerator[AsyncSession, None]:
"""Dependency for async session injection."""
factory = get_async_session_factory(url)
async with factory() as session:
yield session
def get_sync_session(url: str | None = None) -> Generator[Session, None, None]:
"""Dependency for sync session injection."""
factory = get_sync_session_factory(url)
with factory() as session:
yield session
@@ -0,0 +1,23 @@
"""ReceiptWitness ORM models — inlined from cartsnitch-common."""
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
from receiptwitness.shared.models.product import NormalizedProduct
from receiptwitness.shared.models.user import User, UserStoreAccount
# Stub models — needed for relationship resolution but not directly used by receiptwitness.
# Full definitions live in cartsnitch/common.
from receiptwitness.shared.models.stub_store import Store, StoreLocation
from receiptwitness.shared.models.stub_purchase import Purchase, PurchaseItem
__all__ = [
"Base",
"TimestampMixin",
"UUIDPrimaryKeyMixin",
"NormalizedProduct",
"Purchase",
"PurchaseItem",
"Store",
"StoreLocation",
"User",
"UserStoreAccount",
]
+30
View File
@@ -0,0 +1,30 @@
"""Base model and mixins for all ReceiptWitness ORM models."""
import uuid
from datetime import datetime
from sqlalchemy import DateTime, func
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class Base(DeclarativeBase):
"""Base class for all ReceiptWitness models."""
class TimestampMixin:
"""Mixin providing created_at / updated_at columns."""
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
)
class UUIDPrimaryKeyMixin:
"""Mixin providing a UUID primary key."""
id: Mapped[uuid.UUID] = mapped_column(
primary_key=True, default=uuid.uuid4, server_default=func.gen_random_uuid()
)
@@ -0,0 +1,26 @@
"""NormalizedProduct model — the canonical product identity."""
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.orm import Mapped, mapped_column
from receiptwitness.shared.constants import ProductCategory, SizeUnit
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
if TYPE_CHECKING:
pass
class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Canonical product identity — matches products across retailers."""
__tablename__ = "normalized_products"
canonical_name: Mapped[str] = mapped_column(String(300), nullable=False)
category: Mapped[ProductCategory | None] = mapped_column(String(50))
subcategory: Mapped[str | None] = mapped_column(String(100))
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
@@ -0,0 +1,64 @@
"""Stub Purchase and PurchaseItem models.
These are minimal stubs of the full cartsnitch-common Purchase/PurchaseItem models.
They exist solely to satisfy SQLAlchemy relationship resolution for User and
UserStoreAccount. The canonical definitions live in cartsnitch/common.
"""
import uuid
from datetime import date, datetime
from decimal import Decimal
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Index, Numeric, String, UniqueConstraint, func
from sqlalchemy.orm import Mapped, mapped_column
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
class Purchase(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: a shopping trip/receipt. Full definition in cartsnitch/common."""
__tablename__ = "purchases"
user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_location_id: Mapped[uuid.UUID | None] = mapped_column(ForeignKey("store_locations.id"))
receipt_id: Mapped[str] = mapped_column(String(200), nullable=False)
purchase_date: Mapped[date] = mapped_column(Date, nullable=False)
total: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
subtotal: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
tax: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
savings_total: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
source_url: Mapped[str | None] = mapped_column(String(500))
raw_data: Mapped[dict | None] = mapped_column(JSON)
ingested_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
__table_args__ = (
Index("ix_purchases_user_store", "user_id", "store_id"),
UniqueConstraint("user_id", "store_id", "receipt_id", name="uq_purchase_receipt"),
)
class PurchaseItem(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: a line item on a receipt. Full definition in cartsnitch/common."""
__tablename__ = "purchase_items"
purchase_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("purchases.id"), nullable=False)
product_name_raw: Mapped[str] = mapped_column(String(300), nullable=False)
upc: Mapped[str | None] = mapped_column(String(20))
quantity: Mapped[Decimal] = mapped_column(Numeric(10, 3), nullable=False, default=1)
unit_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
extended_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
regular_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
sale_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
coupon_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
loyalty_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
category_raw: Mapped[str | None] = mapped_column(String(100))
normalized_product_id: Mapped[uuid.UUID | None] = mapped_column(
ForeignKey("normalized_products.id")
)
@@ -0,0 +1,39 @@
"""Stub Store and StoreLocation models.
These are minimal stubs of the full cartsnitch-common Store/StoreLocation models.
They exist solely to satisfy SQLAlchemy relationship resolution for User and
UserStoreAccount. The canonical definitions live in cartsnitch/common.
"""
import uuid
from sqlalchemy import Float, ForeignKey, String
from sqlalchemy.orm import Mapped, mapped_column, relationship
from receiptwitness.shared.constants import StoreSlug
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
class Store(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: canonical retailer. Full definition in cartsnitch/common."""
__tablename__ = "stores"
name: Mapped[str] = mapped_column(String(100), nullable=False)
slug: Mapped[StoreSlug] = mapped_column(String(20), nullable=False, unique=True)
logo_url: Mapped[str | None] = mapped_column(String(500))
website_url: Mapped[str | None] = mapped_column(String(500))
class StoreLocation(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: physical store location. Full definition in cartsnitch/common."""
__tablename__ = "store_locations"
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
address: Mapped[str] = mapped_column(String(300), nullable=False)
city: Mapped[str] = mapped_column(String(100), nullable=False)
state: Mapped[str] = mapped_column(String(2), nullable=False)
zip: Mapped[str] = mapped_column(String(10), nullable=False)
lat: Mapped[float | None] = mapped_column(Float)
lng: Mapped[float | None] = mapped_column(Float)
+63
View File
@@ -0,0 +1,63 @@
"""User and UserStoreAccount models."""
import secrets
import uuid
from datetime import datetime
from typing import TYPE_CHECKING
from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint, text
from sqlalchemy.orm import Mapped, mapped_column, relationship
from receiptwitness.shared.constants import AccountStatus
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
if TYPE_CHECKING:
from receiptwitness.shared.models.stub_purchase import Purchase
from receiptwitness.shared.models.stub_store import Store
class User(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Application user."""
__tablename__ = "users"
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
email_inbound_token: Mapped[str] = mapped_column(
String(22),
nullable=False,
unique=True,
default=lambda: secrets.token_urlsafe(16),
server_default=text(
"replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')"
),
)
hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True)
display_name: Mapped[str | None] = mapped_column(String(100))
email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false")
image: Mapped[str | None] = mapped_column(Text, nullable=True)
# Relationships
store_accounts: Mapped[list["UserStoreAccount"]] = relationship(back_populates="user")
purchases: Mapped[list["Purchase"]] = relationship(back_populates="user")
class UserStoreAccount(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Link between a user and their retailer account credentials."""
__tablename__ = "user_store_accounts"
__table_args__ = (UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),)
user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
# WARNING: Contains retailer session cookies/tokens. Encryption-at-rest
# required before production deployment (e.g., pgcrypto or app-level encryption).
session_data: Mapped[dict | None] = mapped_column(JSON)
session_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
status: Mapped[AccountStatus] = mapped_column(
String(20), nullable=False, default=AccountStatus.ACTIVE
)
# Relationships
user: Mapped["User"] = relationship(back_populates="store_accounts")
store: Mapped["Store"] = relationship(back_populates="user_accounts")
@@ -0,0 +1,5 @@
"""ReceiptWitness Pydantic schemas — inlined from cartsnitch-common."""
from receiptwitness.shared.schemas.purchase import PurchaseCreate, PurchaseItemCreate
__all__ = ["PurchaseCreate", "PurchaseItemCreate"]
@@ -0,0 +1,73 @@
"""Purchase and PurchaseItem Pydantic schemas."""
import uuid
from datetime import date, datetime
from decimal import Decimal
from pydantic import BaseModel
class PurchaseItemCreate(BaseModel):
product_name_raw: str
upc: str | None = None
quantity: Decimal = Decimal("1")
unit_price: Decimal
extended_price: Decimal
regular_price: Decimal | None = None
sale_price: Decimal | None = None
coupon_discount: Decimal | None = None
loyalty_discount: Decimal | None = None
category_raw: str | None = None
normalized_product_id: uuid.UUID | None = None
class PurchaseItemRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
purchase_id: uuid.UUID
product_name_raw: str
upc: str | None
quantity: Decimal
unit_price: Decimal
extended_price: Decimal
regular_price: Decimal | None
sale_price: Decimal | None
coupon_discount: Decimal | None
loyalty_discount: Decimal | None
category_raw: str | None
normalized_product_id: uuid.UUID | None
class PurchaseCreate(BaseModel):
user_id: uuid.UUID
store_id: uuid.UUID
store_location_id: uuid.UUID | None = None
receipt_id: str
purchase_date: date
total: Decimal
subtotal: Decimal | None = None
tax: Decimal | None = None
savings_total: Decimal | None = None
source_url: str | None = None
raw_data: dict | None = None
items: list[PurchaseItemCreate] = []
class PurchaseRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
user_id: uuid.UUID
store_id: uuid.UUID
store_location_id: uuid.UUID | None
receipt_id: str
purchase_date: date
total: Decimal
subtotal: Decimal | None
tax: Decimal | None
savings_total: Decimal | None
source_url: str | None
ingested_at: datetime
created_at: datetime
updated_at: datetime
+2 -2
View File
@@ -3,8 +3,8 @@
import asyncio import asyncio
import logging import logging
from cartsnitch_common.database import get_async_session_factory from receiptwitness.shared.database import get_async_session_factory
from cartsnitch_common.models.user import User from receiptwitness.shared.models import User
from sqlalchemy import select from sqlalchemy import select
from receiptwitness.config import settings from receiptwitness.config import settings
+1 -1
View File
@@ -1,7 +1,7 @@
"""Shared test fixtures for pipeline tests.""" """Shared test fixtures for pipeline tests."""
import pytest import pytest
from cartsnitch_common.models.base import Base from receiptwitness.shared.models import Base
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker from sqlalchemy.orm import sessionmaker
+3 -3
View File
@@ -4,9 +4,9 @@ import uuid
from datetime import UTC, datetime from datetime import UTC, datetime
from decimal import Decimal from decimal import Decimal
from cartsnitch_common.constants import MatchConfidence from receiptwitness.shared.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct from receiptwitness.shared.models import NormalizedProduct
from cartsnitch_common.schemas.purchase import PurchaseItemCreate from receiptwitness.shared.schemas import PurchaseItemCreate
from receiptwitness.pipeline.matching import ( from receiptwitness.pipeline.matching import (
ProductMatcher, ProductMatcher,
+1 -1
View File
@@ -3,7 +3,7 @@
import uuid import uuid
from datetime import UTC, datetime from datetime import UTC, datetime
from cartsnitch_common.models.product import NormalizedProduct from receiptwitness.shared.models import NormalizedProduct
from receiptwitness.pipeline.normalization import ( from receiptwitness.pipeline.normalization import (
MatchMethod, MatchMethod,