feat: migrate receiptwitness to standalone repo with inlined common

Extract receiptwitness/ from the monorepo into cartsnitch/receiptwitness.
Inline the consumed modules from cartsnitch-common so there is no
cross-repo dependency.

- Add src/receiptwitness/shared/ with inlined models, schemas, constants, database
- Update all imports from cartsnitch_common to receiptwitness.shared
- Remove cartsnitch-common dependency from pyproject.toml
- Copy and update Alembic config (alembic.ini, alembic/)
- Update Dockerfile for standalone build context, add migration CMD
- Add CI workflow with lint, test, build, grype scan, deploy-dev, deploy-uat
- Add .grype.yaml

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Barcode Betty
2026-04-19 12:18:11 +00:00
parent bf7cabc9d8
commit f47da487da
30 changed files with 898 additions and 33 deletions
+212
View File
@@ -0,0 +1,212 @@
name: CI
on:
push:
branches: [main, dev, uat]
pull_request:
branches: [main, dev, uat]
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
packages: write
env:
REGISTRY: ghcr.io
IMAGE_NAME: cartsnitch/receiptwitness
jobs:
lint:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install ruff
- name: Lint
run: ruff check src/ tests/
test:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
- name: Install dependencies
run: pip install -e ".[dev]"
- name: Run tests
run: pytest tests/ -v
build-and-push:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
needs: [lint, test]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate CalVer tag
id: calver
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
DATE_TAG=$(date -u +%Y.%m.%d)
EXISTING=$(git tag -l "v${DATE_TAG}*" | sort -V | tail -1)
if [ -z "$EXISTING" ]; then VERSION="$DATE_TAG"
elif [ "$EXISTING" = "v${DATE_TAG}" ]; then VERSION="${DATE_TAG}.2"
else BUILD_NUM=$(echo "$EXISTING" | sed "s/v${DATE_TAG}\.//"); VERSION="${DATE_TAG}.$((BUILD_NUM + 1))"; fi
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "CalVer tag: $VERSION"
- name: Log in to GHCR
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: ${{ github.event_name == 'push' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
build-args: APT_CACHE_BUST=${{ github.run_id }}
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
git tag "v${{ steps.calver.outputs.version }}"
git push origin "v${{ steps.calver.outputs.version }}"
grype:
runs-on: runners-cartsnitch
needs: [build-and-push]
if: github.event_name == 'push'
steps:
- uses: actions/checkout@v4
- name: Run Grype vulnerability scan
uses: anchore/sbom-action@v0
with:
image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }}
format: spdx-json
output-file: sbom.spdx.json
- name: Upload SBOM
uses: actions/upload-artifact@v4
with:
name: sbom
path: sbom.spdx.json
- name: Run Grype
uses: anchore/grype-action@v1
with:
sbom: sbom.spdx.json
fail-on: high
ignore-file: .grype.yaml
deploy-dev:
runs-on: runners-cartsnitch
needs: [grype]
if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/dev'
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.CARTSNITCH_APP_ID }}
private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: infra
- name: Checkout infra repo
uses: actions/checkout@v4
with:
repository: cartsnitch/infra
token: ${{ steps.app-token.outputs.token }}
ref: main
path: infra
- name: Install kubectl
uses: azure/setup-kubectl@v4
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Update receiptwitness image tag
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:sha-${{ github.sha }}
- name: Commit and push to infra
run: |
cd infra
git config user.name "cartsnitch-ci[bot]"
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/dev/kustomization.yaml
git commit -m "ci(dev): update receiptwitness to sha-${{ github.sha }}"
git pull --rebase origin main
git push origin main
deploy-uat:
runs-on: runners-cartsnitch
needs: [grype]
if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/uat'
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.CARTSNITCH_APP_ID }}
private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: infra
- name: Checkout infra repo
uses: actions/checkout@v4
with:
repository: cartsnitch/infra
token: ${{ steps.app-token.outputs.token }}
ref: main
path: infra
- name: Install kubectl
uses: azure/setup-kubectl@v4
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Update receiptwitness image tag
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ needs.build-and-push.outputs.calver_tag }}
- name: Commit and push to infra
run: |
cd infra
git config user.name "cartsnitch-ci[bot]"
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/uat/kustomization.yaml
git commit -m "ci(uat): update receiptwitness to ${{ needs.build-and-push.outputs.calver_tag }}"
git pull --rebase origin main
git push origin main
+4
View File
@@ -0,0 +1,4 @@
ignore:
# Python 3.12 CVEs — only fixed in 3.13+, cannot upgrade major version safely
- vulnerability: CVE-2025-13836
- vulnerability: CVE-2026-4519
+3 -3
View File
@@ -11,7 +11,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr
| Repo | Service | Purpose |
|------|---------|---------|
| `cartsnitch/common` | — | Shared models, schemas, utilities |
| `cartsnitch/common` | — | Shared models, schemas, utilities (extracted into individual service repos) |
| `cartsnitch/receiptwitness` | ReceiptWitness | Purchase data ingestion via retailer scrapers (this repo) |
| `cartsnitch/api` | API Gateway | Frontend-facing REST API |
| `cartsnitch/cartsnitch` | Frontend | React PWA (mobile-first) |
@@ -23,7 +23,7 @@ CartSnitch is a self-hosted grocery price intelligence platform built as a polyr
### Architecture Decisions
- **Polyrepo:** Each service has its own repo, Dockerfile, CI/CD pipeline.
- **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models come from `cartsnitch-common`.
- **Shared DB:** One PostgreSQL cluster. This service writes to `purchases`, `purchase_items`, `price_history` tables. Models are inlined under `src/receiptwitness/shared/` (extracted from `cartsnitch-common` during the CAR-724 migration).
- **Inter-service comms:** REST (synchronous) + Redis pub/sub (async events).
- **Target scale:** 5001,000 users. Each user has their own authenticated sessions to up to 3 retailers.
@@ -60,7 +60,7 @@ ReceiptWitness authenticates with grocery retailer web portals using per-user se
- Python 3.12+
- Playwright (Python async API) for headless browser automation
- FastAPI (lightweight internal API for triggering scrapes, health checks, status)
- SQLAlchemy 2.0 (via `cartsnitch-common`)
- SQLAlchemy 2.0 (models inlined under `src/receiptwitness/shared/`)
- Redis (pub/sub event publishing)
- APScheduler or Celery (for scheduled scraping jobs)
- cryptography / Fernet (encrypting stored session data)
+9 -11
View File
@@ -3,22 +3,18 @@ FROM python:3.12-slim AS build
WORKDIR /app
# build-essential and libpq-dev are needed to compile any C-extension wheels
# (e.g. psycopg2 fallback). No git needed — common/ is copied from the repo root.
ARG APT_CACHE_BUST=1
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
libpq-dev \
build-essential \
&& rm -rf /var/lib/apt/lists/*
# Build context is the repo root. These paths are relative to the root.
COPY receiptwitness/pyproject.toml ./
COPY receiptwitness/src/ ./src/
COPY common/ ./common/
# Build context is the receiptwitness repo root.
COPY pyproject.toml ./
COPY src/ ./src/
# Install from the local common/ (cartsnitch-common>=0.1.0 in pyproject.toml
# will be satisfied by the local package) then install receiptwitness itself.
RUN pip install --no-cache-dir --prefix=/install ./common/ .
# Install receiptwitness (shared modules are inlined under src/receiptwitness/shared/).
RUN pip install --no-cache-dir --prefix=/install .
# Stage 2: Production image with Playwright + Chromium
FROM python:3.12-slim AS prod
@@ -50,7 +46,9 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
RUN adduser --system --group --uid 1000 app
COPY --from=build /install /usr/local
COPY receiptwitness/src/ ./src/
COPY src/ ./src/
COPY alembic.ini ./
COPY alembic/ ./alembic/
# Install Playwright Chromium browser (runs as root; /opt/playwright is world-readable)
RUN PLAYWRIGHT_BROWSERS_PATH=/opt/playwright playwright install chromium
@@ -63,4 +61,4 @@ EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=3s \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
CMD ["uvicorn", "receiptwitness.main:app", "--host", "0.0.0.0", "--port", "8000"]
CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn receiptwitness.main:app --host 0.0.0.0 --port 8000"]
+29
View File
@@ -0,0 +1,29 @@
# ReceiptWitness
Purchase data ingestion service for CartSnitch. Authenticates with grocery retailer web portals (Meijer, Kroger, Target) via Playwright, scrapes purchase history, and writes structured records to the shared PostgreSQL database.
## Quick Start
```bash
# Install dependencies
pip install -e ".[dev]"
# Run tests
pytest tests/ -v
# Local dev with Docker Compose
docker-compose up
```
## Architecture
- **Scrapers:** Playwright-based browser automation for each retailer
- **Parsers:** Converts raw receipt data to structured `Purchase` / `PurchaseItem` records
- **Database:** SQLAlchemy 2.0 async; models inlined under `src/receiptwitness/shared/`
- **Events:** Publishes `cartsnitch.receipts.ingested` to Redis after ingestion
## Branches
- `dev` — development, auto-deploys to dev cluster
- `uat` — user acceptance testing
- `main` — production, auto-deploys to prod cluster
+36
View File
@@ -0,0 +1,36 @@
[alembic]
script_location = alembic
sqlalchemy.url = postgresql://localhost:5432/cartsnitch
[loggers]
keys = root,sqlalchemy,alembic
[handlers]
keys = console
[formatters]
keys = generic
[logger_root]
level = WARN
handlers = console
[logger_sqlalchemy]
level = WARN
handlers =
qualname = sqlalchemy.engine
[logger_alembic]
level = INFO
handlers =
qualname = alembic
[handler_console]
class = StreamHandler
args = (sys.stderr,)
level = NOTSET
formatter = generic
[formatter_generic]
format = %(levelname)-5.5s [%(name)s] %(message)s
datefmt = %H:%M:%S
+51
View File
@@ -0,0 +1,51 @@
"""Alembic environment configuration for CartSnitch."""
import os
from logging.config import fileConfig
from sqlalchemy import engine_from_config, pool
from alembic import context
from receiptwitness.shared.models.base import Base
config = context.config
if config.config_file_name is not None:
fileConfig(config.config_file_name)
db_url = os.environ.get("CARTSNITCH_DATABASE_URL_SYNC")
if db_url:
config.set_main_option("sqlalchemy.url", db_url.replace("%", "%%"))
target_metadata = Base.metadata
def run_migrations_offline() -> None:
"""Run migrations in 'offline' mode."""
url = config.get_main_option("sqlalchemy.url")
context.configure(
url=url,
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
)
with context.begin_transaction():
context.run_migrations()
def run_migrations_online() -> None:
"""Run migrations in 'online' mode."""
connectable = engine_from_config(
config.get_section(config.config_ini_section, {}),
prefix="sqlalchemy.",
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
if context.is_offline_mode():
run_migrations_offline()
else:
run_migrations_online()
+25
View File
@@ -0,0 +1,25 @@
"""${message}
Revision ID: ${up_revision}
Revises: ${down_revision | comma,n}
Create Date: ${create_date}
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
${imports if imports else ""}
revision: str = ${repr(up_revision)}
down_revision: Union[str, None] = ${repr(down_revision)}
branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
def upgrade() -> None:
${upgrades if upgrades else "pass"}
def downgrade() -> None:
${downgrades if downgrades else "pass"}
@@ -0,0 +1,37 @@
"""Add email_inbound_token to users.
Revision ID: 001_add_email_inbound_token
Revises:
Create Date: 2026-04-02
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "001_add_email_inbound_token"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.add_column("users", sa.Column("email_inbound_token", sa.String(22), nullable=True))
op.create_unique_constraint("uq_users_email_inbound_token", "users", ["email_inbound_token"])
# Backfill existing users with generated tokens (PostgreSQL)
op.execute(
"UPDATE users SET email_inbound_token = "
"substring(replace(gen_random_uuid()::text, '-', ''), 1, 22) "
"WHERE email_inbound_token IS NULL"
)
# Alter to non-nullable
op.alter_column("users", "email_inbound_token", nullable=False)
def downgrade() -> None:
op.drop_constraint("uq_users_email_inbound_token", "users", type_="unique")
op.drop_column("users", "email_inbound_token")
+2 -5
View File
@@ -8,7 +8,8 @@ version = "0.1.0"
description = "CartSnitch receipt/purchase history ingestion service"
requires-python = ">=3.12"
dependencies = [
"cartsnitch-common>=0.1.0",
"alembic>=1.13,<2.0",
"pydantic[email]>=2.0,<3.0",
"playwright>=1.49,<2.0",
"playwright-stealth>=1.0,<2.0",
"cryptography>=46.0,<47.0",
@@ -50,10 +51,6 @@ strict = false
warn_return_any = true
warn_unused_ignores = true
[[tool.mypy.overrides]]
module = "cartsnitch_common.*"
ignore_missing_imports = true
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]
+2 -2
View File
@@ -7,8 +7,8 @@ from datetime import UTC, datetime
from decimal import Decimal
import redis.asyncio as aioredis
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from receiptwitness.shared.database import get_async_session_factory
from receiptwitness.shared.models import User
from sqlalchemy import select
from receiptwitness.config import settings
+3 -3
View File
@@ -7,9 +7,9 @@ and batch matching for purchase ingestion.
import uuid
from dataclasses import dataclass
from cartsnitch_common.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.schemas.purchase import PurchaseItemCreate
from receiptwitness.shared.constants import MatchConfidence
from receiptwitness.shared.models import NormalizedProduct
from receiptwitness.shared.schemas import PurchaseItemCreate
from sqlalchemy.orm import Session
from receiptwitness.pipeline.normalization import (
+1 -1
View File
@@ -10,7 +10,7 @@ import re
from dataclasses import dataclass
from enum import StrEnum
from cartsnitch_common.models.product import NormalizedProduct
from receiptwitness.shared.models import NormalizedProduct
from sqlalchemy import cast, func, select, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Session
+1 -1
View File
@@ -7,7 +7,7 @@ import re
from datetime import date
from decimal import Decimal, InvalidOperation
from cartsnitch_common.schemas.purchase import PurchaseCreate, PurchaseItemCreate
from receiptwitness.shared.schemas import PurchaseCreate, PurchaseItemCreate
def _clean_product_name(raw: str) -> str:
+5
View File
@@ -0,0 +1,5 @@
"""ReceiptWitness shared package — inlined from cartsnitch-common.
This package contains the subset of cartsnitch-common needed by ReceiptWitness,
extracted and made self-contained so receiptwitness has no cross-repo dependency.
"""
+18
View File
@@ -0,0 +1,18 @@
"""Shared configuration for CartSnitch services via pydantic-settings."""
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Environment-driven settings shared by all CartSnitch services."""
model_config = SettingsConfigDict(env_prefix="CARTSNITCH_", env_file=".env")
database_url: str = "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
database_url_sync: str = "postgresql+psycopg2://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
redis_url: str = "redis://localhost:6379/0"
debug: bool = False
log_level: str = "INFO"
settings = Settings()
+85
View File
@@ -0,0 +1,85 @@
"""Constants and enums shared across CartSnitch services."""
from enum import StrEnum
class StoreSlug(StrEnum):
"""Supported retailer slugs."""
MEIJER = "meijer"
KROGER = "kroger"
TARGET = "target"
class AccountStatus(StrEnum):
"""User store account link status."""
ACTIVE = "active"
EXPIRED = "expired"
ERROR = "error"
class DiscountType(StrEnum):
"""Coupon discount type."""
PERCENT = "percent"
FIXED = "fixed"
BOGO = "bogo"
BUY_X_GET_Y = "buy_x_get_y"
class PriceSource(StrEnum):
"""Source of a price observation."""
RECEIPT = "receipt"
CATALOG = "catalog"
WEEKLY_AD = "weekly_ad"
class EventType(StrEnum):
"""Redis pub/sub event types."""
RECEIPTS_INGESTED = "cartsnitch.receipts.ingested"
PRICES_UPDATED = "cartsnitch.prices.updated"
PRODUCTS_NORMALIZED = "cartsnitch.products.normalized"
COUPONS_UPDATED = "cartsnitch.coupons.updated"
ALERT_PRICE_INCREASE = "cartsnitch.alerts.price_increase"
ALERT_SHRINKFLATION = "cartsnitch.alerts.shrinkflation"
class ProductCategory(StrEnum):
"""Top-level product categories."""
PRODUCE = "produce"
DAIRY = "dairy"
MEAT = "meat"
BAKERY = "bakery"
FROZEN = "frozen"
PANTRY = "pantry"
BEVERAGES = "beverages"
SNACKS = "snacks"
HOUSEHOLD = "household"
PERSONAL_CARE = "personal_care"
OTHER = "other"
class MatchConfidence(StrEnum):
"""Confidence level for product matching."""
HIGH = "high"
MEDIUM = "medium"
LOW = "low"
class SizeUnit(StrEnum):
"""Standardized product size units."""
OZ = "oz"
FL_OZ = "fl_oz"
LB = "lb"
G = "g"
KG = "kg"
ML = "ml"
L = "l"
CT = "ct"
PK = "pk"
+45
View File
@@ -0,0 +1,45 @@
"""Database engine and session factories for sync and async usage."""
from collections.abc import AsyncGenerator, Generator
from sqlalchemy import create_engine
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import Session, sessionmaker
from receiptwitness.shared.config import settings
def get_async_engine(url: str | None = None):
"""Create an async SQLAlchemy engine."""
return create_async_engine(url or settings.database_url, echo=settings.debug)
def get_sync_engine(url: str | None = None):
"""Create a sync SQLAlchemy engine."""
return create_engine(url or settings.database_url_sync, echo=settings.debug)
def get_async_session_factory(url: str | None = None) -> async_sessionmaker[AsyncSession]:
"""Create an async session factory."""
engine = get_async_engine(url)
return async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
def get_sync_session_factory(url: str | None = None) -> sessionmaker[Session]:
"""Create a sync session factory."""
engine = get_sync_engine(url)
return sessionmaker(engine, expire_on_commit=False)
async def get_async_session(url: str | None = None) -> AsyncGenerator[AsyncSession, None]:
"""Dependency for async session injection."""
factory = get_async_session_factory(url)
async with factory() as session:
yield session
def get_sync_session(url: str | None = None) -> Generator[Session, None, None]:
"""Dependency for sync session injection."""
factory = get_sync_session_factory(url)
with factory() as session:
yield session
@@ -0,0 +1,23 @@
"""ReceiptWitness ORM models — inlined from cartsnitch-common."""
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
from receiptwitness.shared.models.product import NormalizedProduct
from receiptwitness.shared.models.user import User, UserStoreAccount
# Stub models — needed for relationship resolution but not directly used by receiptwitness.
# Full definitions live in cartsnitch/common.
from receiptwitness.shared.models.stub_store import Store, StoreLocation
from receiptwitness.shared.models.stub_purchase import Purchase, PurchaseItem
__all__ = [
"Base",
"TimestampMixin",
"UUIDPrimaryKeyMixin",
"NormalizedProduct",
"Purchase",
"PurchaseItem",
"Store",
"StoreLocation",
"User",
"UserStoreAccount",
]
+30
View File
@@ -0,0 +1,30 @@
"""Base model and mixins for all ReceiptWitness ORM models."""
import uuid
from datetime import datetime
from sqlalchemy import DateTime, func
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
class Base(DeclarativeBase):
"""Base class for all ReceiptWitness models."""
class TimestampMixin:
"""Mixin providing created_at / updated_at columns."""
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), nullable=False
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), server_default=func.now(), onupdate=func.now(), nullable=False
)
class UUIDPrimaryKeyMixin:
"""Mixin providing a UUID primary key."""
id: Mapped[uuid.UUID] = mapped_column(
primary_key=True, default=uuid.uuid4, server_default=func.gen_random_uuid()
)
@@ -0,0 +1,26 @@
"""NormalizedProduct model — the canonical product identity."""
from typing import TYPE_CHECKING
from sqlalchemy import JSON, String
from sqlalchemy.orm import Mapped, mapped_column
from receiptwitness.shared.constants import ProductCategory, SizeUnit
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
if TYPE_CHECKING:
pass
class NormalizedProduct(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Canonical product identity — matches products across retailers."""
__tablename__ = "normalized_products"
canonical_name: Mapped[str] = mapped_column(String(300), nullable=False)
category: Mapped[ProductCategory | None] = mapped_column(String(50))
subcategory: Mapped[str | None] = mapped_column(String(100))
brand: Mapped[str | None] = mapped_column(String(200))
size: Mapped[str | None] = mapped_column(String(50))
size_unit: Mapped[SizeUnit | None] = mapped_column(String(10))
upc_variants: Mapped[list[str] | None] = mapped_column(JSON, default=list)
@@ -0,0 +1,64 @@
"""Stub Purchase and PurchaseItem models.
These are minimal stubs of the full cartsnitch-common Purchase/PurchaseItem models.
They exist solely to satisfy SQLAlchemy relationship resolution for User and
UserStoreAccount. The canonical definitions live in cartsnitch/common.
"""
import uuid
from datetime import date, datetime
from decimal import Decimal
from sqlalchemy import JSON, Date, DateTime, ForeignKey, Index, Numeric, String, UniqueConstraint, func
from sqlalchemy.orm import Mapped, mapped_column
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
class Purchase(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: a shopping trip/receipt. Full definition in cartsnitch/common."""
__tablename__ = "purchases"
user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_location_id: Mapped[uuid.UUID | None] = mapped_column(ForeignKey("store_locations.id"))
receipt_id: Mapped[str] = mapped_column(String(200), nullable=False)
purchase_date: Mapped[date] = mapped_column(Date, nullable=False)
total: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
subtotal: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
tax: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
savings_total: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
source_url: Mapped[str | None] = mapped_column(String(500))
raw_data: Mapped[dict | None] = mapped_column(JSON)
ingested_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True),
server_default=func.now(),
nullable=False,
)
__table_args__ = (
Index("ix_purchases_user_store", "user_id", "store_id"),
UniqueConstraint("user_id", "store_id", "receipt_id", name="uq_purchase_receipt"),
)
class PurchaseItem(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: a line item on a receipt. Full definition in cartsnitch/common."""
__tablename__ = "purchase_items"
purchase_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("purchases.id"), nullable=False)
product_name_raw: Mapped[str] = mapped_column(String(300), nullable=False)
upc: Mapped[str | None] = mapped_column(String(20))
quantity: Mapped[Decimal] = mapped_column(Numeric(10, 3), nullable=False, default=1)
unit_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
extended_price: Mapped[Decimal] = mapped_column(Numeric(10, 2), nullable=False)
regular_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
sale_price: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
coupon_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
loyalty_discount: Mapped[Decimal | None] = mapped_column(Numeric(10, 2))
category_raw: Mapped[str | None] = mapped_column(String(100))
normalized_product_id: Mapped[uuid.UUID | None] = mapped_column(
ForeignKey("normalized_products.id")
)
@@ -0,0 +1,39 @@
"""Stub Store and StoreLocation models.
These are minimal stubs of the full cartsnitch-common Store/StoreLocation models.
They exist solely to satisfy SQLAlchemy relationship resolution for User and
UserStoreAccount. The canonical definitions live in cartsnitch/common.
"""
import uuid
from sqlalchemy import Float, ForeignKey, String
from sqlalchemy.orm import Mapped, mapped_column, relationship
from receiptwitness.shared.constants import StoreSlug
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
class Store(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: canonical retailer. Full definition in cartsnitch/common."""
__tablename__ = "stores"
name: Mapped[str] = mapped_column(String(100), nullable=False)
slug: Mapped[StoreSlug] = mapped_column(String(20), nullable=False, unique=True)
logo_url: Mapped[str | None] = mapped_column(String(500))
website_url: Mapped[str | None] = mapped_column(String(500))
class StoreLocation(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Stub: physical store location. Full definition in cartsnitch/common."""
__tablename__ = "store_locations"
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
address: Mapped[str] = mapped_column(String(300), nullable=False)
city: Mapped[str] = mapped_column(String(100), nullable=False)
state: Mapped[str] = mapped_column(String(2), nullable=False)
zip: Mapped[str] = mapped_column(String(10), nullable=False)
lat: Mapped[float | None] = mapped_column(Float)
lng: Mapped[float | None] = mapped_column(Float)
+63
View File
@@ -0,0 +1,63 @@
"""User and UserStoreAccount models."""
import secrets
import uuid
from datetime import datetime
from typing import TYPE_CHECKING
from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint, text
from sqlalchemy.orm import Mapped, mapped_column, relationship
from receiptwitness.shared.constants import AccountStatus
from receiptwitness.shared.models.base import Base, TimestampMixin, UUIDPrimaryKeyMixin
if TYPE_CHECKING:
from receiptwitness.shared.models.stub_purchase import Purchase
from receiptwitness.shared.models.stub_store import Store
class User(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Application user."""
__tablename__ = "users"
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
email_inbound_token: Mapped[str] = mapped_column(
String(22),
nullable=False,
unique=True,
default=lambda: secrets.token_urlsafe(16),
server_default=text(
"replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')"
),
)
hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True)
display_name: Mapped[str | None] = mapped_column(String(100))
email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false")
image: Mapped[str | None] = mapped_column(Text, nullable=True)
# Relationships
store_accounts: Mapped[list["UserStoreAccount"]] = relationship(back_populates="user")
purchases: Mapped[list["Purchase"]] = relationship(back_populates="user")
class UserStoreAccount(UUIDPrimaryKeyMixin, TimestampMixin, Base):
"""Link between a user and their retailer account credentials."""
__tablename__ = "user_store_accounts"
__table_args__ = (UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),)
user_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
# WARNING: Contains retailer session cookies/tokens. Encryption-at-rest
# required before production deployment (e.g., pgcrypto or app-level encryption).
session_data: Mapped[dict | None] = mapped_column(JSON)
session_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
status: Mapped[AccountStatus] = mapped_column(
String(20), nullable=False, default=AccountStatus.ACTIVE
)
# Relationships
user: Mapped["User"] = relationship(back_populates="store_accounts")
store: Mapped["Store"] = relationship(back_populates="user_accounts")
@@ -0,0 +1,5 @@
"""ReceiptWitness Pydantic schemas — inlined from cartsnitch-common."""
from receiptwitness.shared.schemas.purchase import PurchaseCreate, PurchaseItemCreate
__all__ = ["PurchaseCreate", "PurchaseItemCreate"]
@@ -0,0 +1,73 @@
"""Purchase and PurchaseItem Pydantic schemas."""
import uuid
from datetime import date, datetime
from decimal import Decimal
from pydantic import BaseModel
class PurchaseItemCreate(BaseModel):
product_name_raw: str
upc: str | None = None
quantity: Decimal = Decimal("1")
unit_price: Decimal
extended_price: Decimal
regular_price: Decimal | None = None
sale_price: Decimal | None = None
coupon_discount: Decimal | None = None
loyalty_discount: Decimal | None = None
category_raw: str | None = None
normalized_product_id: uuid.UUID | None = None
class PurchaseItemRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
purchase_id: uuid.UUID
product_name_raw: str
upc: str | None
quantity: Decimal
unit_price: Decimal
extended_price: Decimal
regular_price: Decimal | None
sale_price: Decimal | None
coupon_discount: Decimal | None
loyalty_discount: Decimal | None
category_raw: str | None
normalized_product_id: uuid.UUID | None
class PurchaseCreate(BaseModel):
user_id: uuid.UUID
store_id: uuid.UUID
store_location_id: uuid.UUID | None = None
receipt_id: str
purchase_date: date
total: Decimal
subtotal: Decimal | None = None
tax: Decimal | None = None
savings_total: Decimal | None = None
source_url: str | None = None
raw_data: dict | None = None
items: list[PurchaseItemCreate] = []
class PurchaseRead(BaseModel):
model_config = {"from_attributes": True}
id: uuid.UUID
user_id: uuid.UUID
store_id: uuid.UUID
store_location_id: uuid.UUID | None
receipt_id: str
purchase_date: date
total: Decimal
subtotal: Decimal | None
tax: Decimal | None
savings_total: Decimal | None
source_url: str | None
ingested_at: datetime
created_at: datetime
updated_at: datetime
+2 -2
View File
@@ -3,8 +3,8 @@
import asyncio
import logging
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from receiptwitness.shared.database import get_async_session_factory
from receiptwitness.shared.models import User
from sqlalchemy import select
from receiptwitness.config import settings
+1 -1
View File
@@ -1,7 +1,7 @@
"""Shared test fixtures for pipeline tests."""
import pytest
from cartsnitch_common.models.base import Base
from receiptwitness.shared.models import Base
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
+3 -3
View File
@@ -4,9 +4,9 @@ import uuid
from datetime import UTC, datetime
from decimal import Decimal
from cartsnitch_common.constants import MatchConfidence
from cartsnitch_common.models.product import NormalizedProduct
from cartsnitch_common.schemas.purchase import PurchaseItemCreate
from receiptwitness.shared.constants import MatchConfidence
from receiptwitness.shared.models import NormalizedProduct
from receiptwitness.shared.schemas import PurchaseItemCreate
from receiptwitness.pipeline.matching import (
ProductMatcher,
+1 -1
View File
@@ -3,7 +3,7 @@
import uuid
from datetime import UTC, datetime
from cartsnitch_common.models.product import NormalizedProduct
from receiptwitness.shared.models import NormalizedProduct
from receiptwitness.pipeline.normalization import (
MatchMethod,