Compare commits

...

38 Commits

Author SHA1 Message Date
CartSnitch Engineer Bot d0c31ffc26 Merge main into fix/npm-audit-vulnerabilities 2026-04-03 13:17:56 +00:00
cartsnitch-qa[bot] c8de30ec6e Merge pull request #107 from cartsnitch/fix/inbound-email-500
fix: move email-in-address endpoint from /auth to /api/v1 prefix
2026-04-03 12:39:22 +00:00
Paperclip 5e763bcb6d fix(deps): resolve npm audit vulnerabilities (brace-expansion, lodash)
- Override brace-expansion to >=1.1.13 to resolve GHSA-f886-m6hf-6m8v
- Override lodash to >=4.17.24 to resolve GHSA-r5fr-rjxr-66jc and GHSA-f23m-r3pf-42rh
- Override minimatch to ^10.2.4 to maintain compatibility with brace-expansion@5.x

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 12:35:51 +00:00
CartSnitch Engineer Bot c1dc3e77e0 fix(receiptwitness): handle invalid timestamp in Mailgun webhook verification
Wrap int(timestamp) in try/except to return False instead of raising
ValueError on empty/invalid timestamp, which was causing a 500 error
instead of the intended 406.

Also add tests for empty timestamp (→ 406) and GET /inbound/email (→ 405).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 12:09:51 +00:00
CartSnitch Engineer Bot 1af98c40ab fix: move email-in-address endpoint from /auth to /api/v1 prefix
The GET /me/email-in-address endpoint was unreachable because the Gateway
HTTPRoute routes all /auth/* traffic to Better-Auth (port 3001), not the
API service. This change:
- Moves the endpoint from the /auth router to a new /api/v1/me/ router
- Adds EmailInAddressResponse schema and get_email_in_address service method
- Updates Settings.tsx to call /api/v1/me/email-in-address

Fixes CAR-445.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 11:44:31 +00:00
cartsnitch-ceo[bot] 1aaa8e78fd feat(frontend): show email-in address on Settings page (#103)
feat(frontend): show email-in address on Settings page
2026-04-03 11:27:58 +00:00
cartsnitch-qa[bot] c3bfd3560b Merge branch 'main' into feat/email-in-settings 2026-04-03 11:25:04 +00:00
cartsnitch-ceo[bot] de2407d985 Merge pull request #105 from cartsnitch/sync/api-2026-04-03
fix(api): revert auth/type regressions from standalone sync, keep email-in feature only
2026-04-03 10:38:35 +00:00
CartSnitch Engineer Bot d52fb83296 fix(frontend): correct email-in-address fetch URL to /auth prefix
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 10:32:32 +00:00
CartSnitch Engineer Bot c855575e77 fix(api): restore /api/v1 prefix on data routers
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 10:15:21 +00:00
CartSnitch Engineer Bot 7c45b04dce feat(frontend): show email-in address on Settings page with copy button
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 09:45:45 +00:00
CartSnitch Engineer Bot f721918f95 fix(api): revert auth/type regressions from standalone sync, keep email-in feature only
- Revert auth/dependencies.py to cookie+Bearer dual auth with str user IDs
- Add GET /auth/me/email-in-address endpoint for receipt email routing
- Update User model: add email_inbound_token, change id/store_id/user_id to str
- Update AuthService and UserResponse to use str user IDs
- Update route count test: 33 -> 34 routes
- Restore e2e test for email-in-address endpoint

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 09:40:39 +00:00
CartSnitch Engineer Bot 692f42fbbb fix(auth): revert to Better-Auth session-cookie auth, preserve email-in feature
- Revert auth/dependencies.py, auth/routes.py, services/auth.py, schemas.py
  to Better-Auth session-cookie auth (removed JWT register/login/refresh)
- Preserve GET /auth/me/email-in-address endpoint
- Fix UUIDString TypeDecorator: process_result_value returns uuid.UUID
  (not str) so SQLAlchemy 2.0 sentinel tracking matches UUID-to-UUID
- Fix seed_data fixture: look up real user_id from session token via
  sessions table; purchases now reference actual user FK
- Update purchase_data fixture to use session-cookie auth
- Update test_auth_endpoints, test_auth_validation to cookie-based tests
- Remove TestRegistrationErrors and TestLoginErrors (no longer applicable)
- Update test_openapi.py expected routes and count
- Update test_error_handler.py to use PATCH /auth/me validation

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 09:15:00 +00:00
cartsnitch-ceo[bot] b95f1725c7 sync(receiptwitness): copy latest standalone code to monorepo
Merging per SDLC workflow. QA approved (Checkout Charlie), CTO approved (Savannah Savings), CI green. Pre-existing audit failure acknowledged. CAR-423.
2026-04-03 08:14:49 +00:00
Barcode Betty 70b9d1d6d6 sync(api): copy latest standalone code and merge alembic migrations
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 07:54:31 +00:00
Barcode Betty f36429936a sync(receiptwitness): copy latest standalone code to monorepo
Syncs receiptwitness standalone repo code into monorepo subdirectory.
Includes email parsing, notifications, queue, and worker modules.
Keeps monorepo Dockerfile (uses local common/).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-03 07:41:00 +00:00
cartsnitch-ceo[bot] 1b418e7c6f fix(api): change purchased_at and expires_at schema types from datetime to date
fix(api): change purchased_at and expires_at schema types from datetime to date
2026-04-01 23:56:49 +00:00
cartsnitch-ceo[bot] 0b31badbcd Merge branch 'main' into fix/api-date-schema-types 2026-04-01 20:56:13 +00:00
cartsnitch-ceo[bot] eb579dcaa5 fix(frontend): remove hardcoded mock product IDs from Dashboard price trends
fix(frontend): remove hardcoded mock product IDs from Dashboard price trends
2026-04-01 20:25:19 +00:00
cartsnitch-ceo[bot] 086868d450 Merge branch 'main' into fix/dashboard-hardcoded-product-ids 2026-04-01 20:19:22 +00:00
CartSnitch Engineer Bot 63621df0b8 fix(frontend): remove unused React import from Dashboard.tsx
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 19:58:41 +00:00
cartsnitch-engineer[bot] 41e6bfdcf5 feat(scripts): add dev environment seed script and K8s Job (#99)
* fix(api): replace UUID type with str for Better-Auth nanoid user IDs

Better-Auth uses nanoid strings for user IDs, not UUIDs. Changed all
user_id parameter/return types in the API layer from UUID to str,
removed the obsolete UUID import where unused, and updated the
_validate_session_token return type accordingly.

Co-Authored-By: Paperclip <noreply@paperclip.ing>

* feat(scripts): add dev environment seed script and K8s Job

Co-Authored-By: Paperclip <noreply@paperclip.ing>

---------

Co-authored-by: CartSnitch Engineer Bot <cartnoreply@cartsnitch.com>
Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-01 19:51:45 +00:00
CartSnitch Engineer Bot a60859f22f fix(frontend): remove unused React import from Dashboard.tsx
Removes the unused `import React from 'react'` line from Dashboard.tsx
to resolve TS6133 error in lighthouse CI. No other code in the file
references the React namespace.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 19:49:14 +00:00
CartSnitch Engineer Bot 8e8d4a4774 fix(api): change purchased_at and expires_at schema types from datetime to date
Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 19:41:35 +00:00
CartSnitch Engineer Bot e85d757cc6 fix(frontend): remove hardcoded mock product IDs from Dashboard price trends
Removed usePriceHistory calls with hardcoded string product IDs (prod1, prod10)
that caused 422 errors against the UUID-expecting API. The Price Trends section
now shows a placeholder until a proper featured-products endpoint is available.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 19:37:44 +00:00
CartSnitch Engineer Bot 43cb62a4d6 fix(api): remove TimestampMixin from models whose DB tables lack timestamp columns
Remove TimestampMixin (created_at/updated_at) from Purchase, PurchaseItem,
PriceHistory, Coupon, and ShrinkflationEvent models since their PostgreSQL
tables do not have those columns. This was causing 500 errors on
/api/v1/purchases and /api/v1/purchases/stats.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 19:36:21 +00:00
cartsnitch-engineer[bot] f7e1574176 fix(api): replace UUID type with str for Better-Auth nanoid user IDs (#98)
Better-Auth uses nanoid strings for user IDs, not UUIDs. Changed all
user_id parameter/return types in the API layer from UUID to str,
removed the obsolete UUID import where unused, and updated the
_validate_session_token return type accordingly.

Co-authored-by: CartSnitch Engineer Bot <cartnoreply@cartsnitch.com>
Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-01 19:15:58 +00:00
cartsnitch-ceo[bot] ee6352a2f5 fix(api): parse signed session cookie instead of SHA-256 hashing
fix(api): parse signed session cookie instead of SHA-256 hashing
2026-04-01 11:34:59 +00:00
CartSnitch Engineer Bot 2f37f0501f fix(api): parse signed session cookie instead of SHA-256 hashing
Better-Auth v1.5.6 stores raw tokens in sessions.token, not SHA-256
hashes. The session cookie is signed (rawToken.hmacSignature), so
strip the HMAC signature suffix before querying the DB.

Fixes 401 errors on all data endpoints caused by the incorrect hash.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 11:09:29 +00:00
cartsnitch-cto[bot] 4c36fd4156 fix(api): restore SHA-256 session token hashing (regression from PR #95)
Restores sha256 import and token hashing in _validate_session_token.

Regression introduced when PR #95 (cookie name fix) was merged without
the hash fix from PR #93.

QA approved: CAR-324 (Checkout Charlie)
CTO approved: Paperclip (Savannah Savings)
Resolves CAR-323

cc @cpfarhood
2026-04-01 10:29:05 +00:00
cartsnitch-ceo[bot] c9172f088f fix(api): read __Secure- prefixed session cookie for HTTPS environments
Merges fix/secure-cookie-name. Resolves CAR-321.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 08:16:41 +00:00
cartsnitch-engineer[bot] ac4cba2b0d fix(api): read __Secure- prefixed session cookie for HTTPS environments
Better-Auth automatically prefixes cookie names with __Secure- when serving
over HTTPS. The API gateway now tries __Secure-better-auth.session_token
first (HTTPS/deployed), falling back to better-auth.session_token (HTTP/local dev).

Fixes CAR-321.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 04:02:49 +00:00
cartsnitch-cto[bot] 0c47be8ef3 fix(frontend): align API route paths with backend (alerts, price-history)
CEO merge: QA approved (cartsnitch-qa[bot]), CTO approved (cartsnitch-cto[bot]), CI green. Merging per SDLC gatekeeper role.
2026-04-01 03:13:01 +00:00
cartsnitch-cto[bot] 440f92e96e Merge branch 'main' into fix/frontend-api-routes 2026-04-01 03:08:44 +00:00
cartsnitch-ceo[bot] 97bbdf68a5 fix(api): hash session token before DB lookup to match Better-Auth storage
fix(api): hash session token before DB lookup to match Better-Auth storage
2026-04-01 02:49:07 +00:00
CartSnitch Engineer Bot 02e5bee390 fix(frontend): align API route paths with backend (alerts, price-history)
Change frontend to call /alerts (was /price-alerts) and /products/{id}/prices
(was /products/{id}/price-history) to match the backend router mounts.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 02:10:12 +00:00
CartSnitch Engineer Bot d475b3876a fix(api): hash session token before DB lookup to match Better-Auth storage
Better-Auth v1.5.6+ stores session tokens as SHA-256 hashes in the
sessions table. The raw token from the cookie was being queried directly,
causing all authenticated /api/v1/* requests to return 401.

Fixes CAR-313.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 02:09:55 +00:00
cartsnitch-qa[bot] 76bcc53992 fix(api): mount data routers under /api/v1 prefix
Merges fix for CAR-310 / CAR-161 UAT failure. QA approved, CTO approved, CI green.
2026-04-01 01:50:20 +00:00
54 changed files with 3225 additions and 728 deletions
+164
View File
@@ -0,0 +1,164 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
packages: write
env:
REGISTRY: ghcr.io
IMAGE_NAME: cartsnitch/api
jobs:
lint:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- run: pip install ruff
- name: Ruff lint
run: ruff check .
- name: Ruff format check
run: ruff format --check .
typecheck:
runs-on: runners-cartsnitch
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libpq-dev build-essential
- name: Install cartsnitch-common from GitHub
run: pip install "cartsnitch-common @ git+https://github.com/cartsnitch/common.git"
- run: pip install -e ".[dev]" mypy
- name: Type check
run: mypy src/cartsnitch_api
test:
runs-on: runners-cartsnitch
services:
postgres:
image: postgres:15-alpine
credentials:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
env:
POSTGRES_USER: cartsnitch
POSTGRES_PASSWORD: cartsnitch_test
POSTGRES_DB: cartsnitch_test
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: redis:7-alpine
credentials:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
CARTSNITCH_DATABASE_URL: postgresql+asyncpg://cartsnitch:cartsnitch_test@localhost:5432/cartsnitch_test
CARTSNITCH_REDIS_URL: redis://localhost:6379/0
CARTSNITCH_JWT_SECRET_KEY: test-secret-do-not-use-in-prod
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libpq-dev build-essential
- name: Install cartsnitch-common from GitHub
run: pip install "cartsnitch-common @ git+https://github.com/cartsnitch/common.git"
- run: pip install -e ".[dev]"
- name: Run tests
run: pytest --tb=short -q
build-and-push:
runs-on: runners-cartsnitch
needs: [lint, test]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate CalVer tag
id: calver
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
DATE_TAG=$(date -u +%Y.%m.%d)
EXISTING=$(git tag -l "v${DATE_TAG}*" | sort -V | tail -1)
if [ -z "$EXISTING" ]; then
VERSION="$DATE_TAG"
elif [ "$EXISTING" = "v${DATE_TAG}" ]; then
VERSION="${DATE_TAG}.2"
else
BUILD_NUM=$(echo "$EXISTING" | sed "s/v${DATE_TAG}\.//")
VERSION="${DATE_TAG}.$((BUILD_NUM + 1))"
fi
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "CalVer tag: $VERSION"
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: prod
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
git tag "v${{ steps.calver.outputs.version }}"
git push origin "v${{ steps.calver.outputs.version }}"
+4 -11
View File
@@ -1,5 +1,3 @@
# Stage 1: Build dependencies
# Build context is the repo root. Paths below are relative to the root.
FROM python:3.12-slim AS build
RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -8,21 +6,16 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY api/pyproject.toml ./
COPY api/src/ ./src/
COPY pyproject.toml ./
COPY src/ ./src/
RUN pip install --no-cache-dir --prefix=/install .
# Stage 2: Production image
FROM python:3.12-slim AS prod
RUN apt-get update && apt-get install -y --no-install-recommends libpq5 && rm -rf /var/lib/apt/lists/*
WORKDIR /app
RUN adduser --system --group --uid 1000 app
COPY --from=build /install /usr/local
COPY api/src/ ./src/
COPY api/alembic.ini ./
COPY api/alembic/ ./alembic/
COPY src/ ./src/
USER 1000
EXPOSE 8000
@@ -30,4 +23,4 @@ EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=3s \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn cartsnitch_api.main:app --host 0.0.0.0 --port 8000"]
CMD ["uvicorn", "cartsnitch_api.main:app", "--host", "0.0.0.0", "--port", "8000"]
@@ -0,0 +1,49 @@
"""Add email_inbound_token to users.
Revision ID: 005_add_email_inbound_token
Revises: 004_fix_user_id_text
Create Date: 2026-04-02
"""
import secrets
import sqlalchemy as sa
from alembic import op
revision = "005_add_email_inbound_token"
down_revision = "004_fix_user_id_text"
branch_labels = None
depends_on = None
def upgrade() -> None:
# Add column nullable first so existing rows can be backfilled
op.add_column(
"users",
sa.Column("email_inbound_token", sa.String(22), nullable=True),
)
# Backfill existing users with unique tokens
connection = op.get_bind()
result = connection.execute(sa.text("SELECT id FROM users WHERE email_inbound_token IS NULL"))
for (user_id,) in result:
token = secrets.token_urlsafe(16)
connection.execute(
sa.text("UPDATE users SET email_inbound_token = :token WHERE id = :id"),
{"token": token, "id": user_id},
)
# Now enforce non-null and unique
op.alter_column("users", "email_inbound_token", nullable=False)
op.create_index(
"ix_users_email_inbound_token",
"users",
["email_inbound_token"],
unique=True,
)
def downgrade() -> None:
op.drop_index("ix_users_email_inbound_token", table_name="users")
op.drop_column("users", "email_inbound_token")
+4 -6
View File
@@ -5,8 +5,6 @@ Sessions are verified by querying the shared sessions table directly.
"""
from datetime import UTC, datetime
from uuid import UUID
from fastapi import Cookie, Depends, Header, HTTPException, Request, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from sqlalchemy import text
@@ -23,10 +21,10 @@ bearer_scheme = HTTPBearer(auto_error=False)
SESSION_COOKIE_NAME = "better-auth.session_token"
async def _validate_session_token(token: str, db: AsyncSession) -> UUID:
async def _validate_session_token(token: str, db: AsyncSession) -> str:
"""Validate a Better-Auth session token against the sessions table.
Returns the user_id (as UUID) if the session is valid and not expired.
Returns the user_id (as str) if the session is valid and not expired.
"""
result = await db.execute(
text("SELECT user_id, expires_at FROM sessions WHERE token = :token"),
@@ -51,14 +49,14 @@ async def _validate_session_token(token: str, db: AsyncSession) -> UUID:
detail="Session expired",
)
return UUID(str(user_id))
return str(user_id)
async def get_current_user(
request: Request,
credentials: HTTPAuthorizationCredentials | None = Depends(bearer_scheme),
db: AsyncSession = Depends(get_db),
) -> UUID:
) -> str:
"""Extract and validate the session token from cookie or Authorization header.
Checks in order:
+31 -5
View File
@@ -5,13 +5,14 @@ the Better-Auth service (auth/). This router provides user profile
endpoints that query our own user data from the shared database.
"""
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from cartsnitch_api.auth.dependencies import get_current_user
from cartsnitch_api.database import get_db
from cartsnitch_api.models import User
from cartsnitch_api.schemas import (
UpdateUserRequest,
UserResponse,
@@ -21,9 +22,14 @@ from cartsnitch_api.services.auth import AuthService
router = APIRouter(prefix="/auth", tags=["auth"])
class EmailInAddressResponse(BaseModel):
email_address: str
instructions: str
@router.get("/me", response_model=UserResponse)
async def get_me(
user_id: UUID = Depends(get_current_user),
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
@@ -38,7 +44,7 @@ async def get_me(
@router.patch("/me", response_model=UserResponse)
async def update_me(
body: UpdateUserRequest,
user_id: UUID = Depends(get_current_user),
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
@@ -54,7 +60,7 @@ async def update_me(
@router.delete("/me", status_code=status.HTTP_204_NO_CONTENT)
async def delete_me(
user_id: UUID = Depends(get_current_user),
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
@@ -64,3 +70,23 @@ async def delete_me(
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="User not found"
) from None
@router.get("/me/email-in-address", response_model=EmailInAddressResponse)
async def get_email_in_address(
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
result = await db.execute(select(User.email_inbound_token).where(User.id == user_id))
token = result.scalar_one_or_none()
if not token:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="Email inbound token not found"
) from None
return EmailInAddressResponse(
email_address=f"receipts+{token}@receipts.cartsnitch.com",
instructions=(
"Forward your digital receipt emails to this address. "
"We currently support Meijer, Kroger, and Target receipt emails."
),
)
+2
View File
@@ -18,6 +18,7 @@ from cartsnitch_api.routes.purchases import router as purchases_router
from cartsnitch_api.routes.scraping import router as scraping_router
from cartsnitch_api.routes.shopping import router as shopping_router
from cartsnitch_api.routes.stores import router as stores_router
from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager
@@ -49,6 +50,7 @@ def create_app() -> FastAPI:
# Data endpoints mounted under /api/v1
v1_router = APIRouter(prefix="/api/v1")
v1_router.include_router(user_router)
v1_router.include_router(stores_router)
v1_router.include_router(purchases_router)
v1_router.include_router(products_router)
+1 -1
View File
@@ -33,7 +33,7 @@ class Purchase(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "purchases"
user_id: Mapped[str] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_id: Mapped[str] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_location_id: Mapped[uuid.UUID | None] = mapped_column(ForeignKey("store_locations.id"))
receipt_id: Mapped[str] = mapped_column(String(200), nullable=False)
purchase_date: Mapped[date] = mapped_column(Date, nullable=False)
+8 -2
View File
@@ -1,6 +1,6 @@
"""User and UserStoreAccount models."""
import uuid
import secrets
from datetime import datetime
from typing import TYPE_CHECKING
@@ -25,6 +25,12 @@ class User(TimestampMixin, Base):
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
hashed_password: Mapped[str] = mapped_column(String(255), nullable=False)
display_name: Mapped[str | None] = mapped_column(String(100))
email_inbound_token: Mapped[str] = mapped_column(
String(22),
nullable=False,
unique=True,
default=lambda: secrets.token_urlsafe(16),
)
# Relationships
store_accounts: Mapped[list["UserStoreAccount"]] = relationship(back_populates="user")
@@ -38,7 +44,7 @@ class UserStoreAccount(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__table_args__ = (UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),)
user_id: Mapped[str] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_id: Mapped[str] = mapped_column(ForeignKey("stores.id"), nullable=False)
session_data: Mapped[dict | None] = mapped_column(EncryptedJSON)
session_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
+26
View File
@@ -0,0 +1,26 @@
"""User routes: per-user account endpoints (email-in address, etc.)."""
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from cartsnitch_api.auth.dependencies import get_current_user
from cartsnitch_api.database import get_db
from cartsnitch_api.schemas import EmailInAddressResponse
from cartsnitch_api.services.auth import AuthService
router = APIRouter(tags=["user"])
@router.get("/me/email-in-address", response_model=EmailInAddressResponse)
async def get_email_in_address(
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
try:
email_address = await svc.get_email_in_address(user_id)
return EmailInAddressResponse(email_address=email_address)
except LookupError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="User not found"
) from None
+4
View File
@@ -22,6 +22,10 @@ class UserResponse(BaseModel):
created_at: datetime
class EmailInAddressResponse(BaseModel):
email_address: str
# ---------- Stores ----------
+14 -5
View File
@@ -5,8 +5,6 @@ handled by the Better-Auth service (auth/). This service provides
user lookup and profile update operations for the API gateway.
"""
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -15,7 +13,7 @@ class AuthService:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def get_user(self, user_id: UUID) -> dict:
async def get_user(self, user_id: str) -> dict:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -30,7 +28,7 @@ class AuthService:
"created_at": user.created_at,
}
async def update_user(self, user_id: UUID, **fields) -> dict:
async def update_user(self, user_id: str, **fields) -> dict:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -58,7 +56,7 @@ class AuthService:
"created_at": user.created_at,
}
async def delete_user(self, user_id: UUID) -> None:
async def delete_user(self, user_id: str) -> None:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -68,3 +66,14 @@ class AuthService:
await self.db.delete(user)
await self.db.commit()
async def get_email_in_address(self, user_id: str) -> str:
"""Return the per-user email-in address for receipt forwarding."""
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
user = result.scalar_one_or_none()
if not user:
raise LookupError("User not found")
return f"{user.email_inbound_token}@email.cartsnitch.com"
@@ -0,0 +1,61 @@
"""Tests for GET /auth/me/email-in-address endpoint."""
import pytest
from httpx import AsyncClient
@pytest.mark.asyncio
async def test_get_email_in_address_authenticated(client: AsyncClient, auth_headers: dict):
"""Authenticated user gets their email-in address."""
response = await client.get(
"/auth/me/email-in-address",
headers=auth_headers,
)
assert response.status_code == 200
data = response.json()
assert "email_address" in data
assert data["email_address"].startswith("receipts+")
assert data["email_address"].endswith("@receipts.cartsnitch.com")
assert len(data["email_address"]) > len("receipts+@receipts.cartsnitch.com")
assert "instructions" in data
assert "Meijer" in data["instructions"]
assert "Kroger" in data["instructions"]
assert "Target" in data["instructions"]
@pytest.mark.asyncio
async def test_get_email_in_address_unauthenticated(client: AsyncClient):
"""Unauthenticated request returns 401."""
response = await client.get("/auth/me/email-in-address")
assert response.status_code == 401
@pytest.mark.asyncio
async def test_get_email_in_address_invalid_token(client: AsyncClient):
"""Invalid JWT token returns 401."""
response = await client.get(
"/auth/me/email-in-address",
headers={"Authorization": "Bearer invalid-token-xyz"},
)
assert response.status_code == 401
@pytest.mark.asyncio
async def test_email_address_format(client: AsyncClient, auth_headers: dict):
"""Email address format is receipts+{22-char-urlsafe-token}@receipts.cartsnitch.com."""
response = await client.get(
"/auth/me/email-in-address",
headers=auth_headers,
)
assert response.status_code == 200
data = response.json()
email = data["email_address"]
# Format: receipts+<22-char-urlsafe-token>@receipts.cartsnitch.com
assert email.startswith("receipts+")
assert email.endswith("@receipts.cartsnitch.com")
# token_urlsafe(16) produces 22 chars
middle = email[len("receipts+") : -len("@receipts.cartsnitch.com")]
assert len(middle) == 22
assert "@" not in middle
+3 -2
View File
@@ -6,13 +6,14 @@ from httpx import ASGITransport, AsyncClient
from cartsnitch_api.main import app
EXPECTED_ROUTES = [
# Auth (6)
# Auth (7)
("post", "/auth/register"),
("post", "/auth/login"),
("post", "/auth/refresh"),
("get", "/auth/me"),
("patch", "/auth/me"),
("delete", "/auth/me"),
("get", "/auth/me/email-in-address"),
# Stores (4)
("get", "/stores"),
("get", "/me/stores"),
@@ -89,4 +90,4 @@ async def test_route_count():
if method in ("get", "post", "put", "delete", "patch"):
count += 1
assert count == 33, f"Expected 33 routes, found {count}"
assert count == 34, f"Expected 34 routes, found {count}"
+548 -650
View File
File diff suppressed because it is too large Load Diff
+4 -1
View File
@@ -50,6 +50,9 @@
"overrides": {
"@rollup/pluginutils": "5.3.0",
"flatted": "^3.4.2",
"serialize-javascript": "7.0.5"
"serialize-javascript": "7.0.5",
"brace-expansion": ">=1.1.13",
"lodash": ">=4.17.24",
"minimatch": "^10.2.4"
}
}
+5
View File
@@ -14,11 +14,13 @@ dependencies = [
"cryptography>=42.0,<44.0",
"fastapi>=0.115,<1.0",
"uvicorn[standard]>=0.30,<1.0",
"beautifulsoup4>=4.12,<5.0",
"redis>=5.0,<6.0",
"pydantic>=2.0,<3.0",
"pydantic-settings>=2.0,<3.0",
"sqlalchemy[asyncio]>=2.0,<3.0",
"asyncpg>=0.29,<1.0",
"resend>=2.0",
]
[project.optional-dependencies]
@@ -27,6 +29,9 @@ dev = [
"pytest-asyncio>=0.23",
"ruff>=0.3",
"pytest-cov>=5.0",
"fakeredis[aioredis]>=2.20",
"httpx>=0.27",
"python-multipart>=0.0.9",
]
[tool.hatch.build.targets.wheel]
@@ -1,9 +1,65 @@
"""Internal API routes for triggering scrapes and checking status."""
from fastapi import APIRouter
import hashlib
import hmac
import re
import time
from fastapi import APIRouter, HTTPException, Request
from receiptwitness.config import settings
from receiptwitness.queue.email import EmailJob, enqueue_email, get_redis
router = APIRouter()
TOKEN_PATTERN = re.compile(r"receipts\+([A-Za-z0-9_-]+)@")
def verify_mailgun_signature(token: str, timestamp: str, signature: str) -> bool:
"""Verify Mailgun webhook signature."""
try:
ts = int(timestamp)
except (ValueError, TypeError):
return False
if abs(time.time() - ts) > 300: # 5 min freshness
return False
key = settings.mailgun_webhook_signing_key.encode()
hmac_digest = hmac.new(key, f"{timestamp}{token}".encode(), hashlib.sha256).hexdigest()
return hmac.compare_digest(signature, hmac_digest)
@router.post("/inbound/email")
async def receive_inbound_email(request: Request):
form = await request.form()
# 1. Verify Mailgun signature
token = str(form.get("token", ""))
timestamp = str(form.get("timestamp", ""))
signature = str(form.get("signature", ""))
if not verify_mailgun_signature(token, timestamp, signature):
raise HTTPException(status_code=406, detail="Invalid signature")
# 2. Extract account token from recipient
recipient = str(form.get("recipient", ""))
match = TOKEN_PATTERN.search(recipient)
if not match:
raise HTTPException(status_code=406, detail="Invalid recipient")
account_token = match.group(1)
# 3. Enqueue — worker resolves token -> user_id
body_html_val = form.get("body-html")
body_plain_val = form.get("body-plain")
job = EmailJob(
user_id=account_token,
sender=str(form.get("sender", "")),
recipient=recipient,
subject=str(form.get("subject", "")),
body_html=str(body_html_val) if body_html_val is not None else None,
body_plain=str(body_plain_val) if body_plain_val is not None else None,
received_at=str(form.get("timestamp", "")),
message_id=str(form.get("Message-Id", "")),
)
client = await get_redis()
await enqueue_email(client, job)
return {"status": "queued"}
@router.get("/health")
async def health():
@@ -22,5 +22,13 @@ class ReceiptWitnessSettings(BaseSettings):
headless: bool = True
browser_timeout_ms: int = 60000
# Email notifications (Resend)
resend_api_key: str = ""
notification_email_from: str = "notifications@cartsnitch.com"
notifications_enabled: bool = False
# Mailgun inbound email webhook
mailgun_webhook_signing_key: str = ""
settings = ReceiptWitnessSettings()
+46 -8
View File
@@ -2,12 +2,17 @@
import json
import logging
import uuid
from datetime import UTC, datetime
from decimal import Decimal
import redis.asyncio as aioredis
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from sqlalchemy import select
from receiptwitness.config import settings
from receiptwitness.notifications.email import send_receipt_notification
logger = logging.getLogger(__name__)
@@ -39,6 +44,36 @@ async def get_redis_client() -> aioredis.Redis:
return aioredis.Redis(connection_pool=_get_pool())
async def _send_notification_for_event(payload: dict) -> None:
"""Look up user email and send receipt notification. Silently skips on error."""
try:
user_uuid = uuid.UUID(payload["user_id"])
except (ValueError, KeyError):
logger.warning("Invalid user_id in event payload: %s", payload.get("user_id"))
return
try:
session_factory = get_async_session_factory(settings.database_url)
async with session_factory() as session:
result = await session.execute(select(User.email).where(User.id == user_uuid))
row = result.scalar_one_or_none()
if not row:
logger.warning("User %s not found for notification", user_uuid)
return
user_email = row
except Exception:
logger.exception("Failed to look up user email for notification")
return
await send_receipt_notification(
user_email=user_email,
store_name=payload["store_slug"],
item_count=payload["item_count"],
total=payload["total"],
purchase_date=payload["purchase_date"],
)
async def publish_receipt_ingested(
user_id: str,
store_slug: str,
@@ -48,18 +83,19 @@ async def publish_receipt_ingested(
total: Decimal | float,
) -> None:
"""Publish a cartsnitch.receipts.ingested event after successful ingestion."""
payload = {
"user_id": user_id,
"store_slug": store_slug,
"purchase_id": purchase_id,
"purchase_date": purchase_date,
"item_count": item_count,
"total": float(total) if isinstance(total, Decimal) else total,
}
event = {
"event_type": CHANNEL_RECEIPTS_INGESTED,
"timestamp": datetime.now(UTC).isoformat(),
"service": "receiptwitness",
"payload": {
"user_id": user_id,
"store_slug": store_slug,
"purchase_id": purchase_id,
"purchase_date": purchase_date,
"item_count": item_count,
"total": float(total) if isinstance(total, Decimal) else total,
},
"payload": payload,
}
try:
@@ -73,3 +109,5 @@ async def publish_receipt_ingested(
except aioredis.ConnectionError:
logger.error("Failed to publish event — Redis/DragonflyDB connection error")
raise
else:
await _send_notification_for_event(payload)
@@ -0,0 +1,45 @@
"""Email notifications via Resend."""
import asyncio
import html
import logging
import resend
from receiptwitness.config import settings
logger = logging.getLogger(__name__)
async def send_receipt_notification(
user_email: str,
store_name: str,
item_count: int,
total: float,
purchase_date: str,
) -> None:
"""Send receipt ingestion confirmation email via Resend."""
if not settings.notifications_enabled or not settings.resend_api_key:
logger.debug("Notifications disabled — skipping email send")
return
resend.api_key = settings.resend_api_key
store_name_safe = html.escape(store_name)
purchase_date_safe = html.escape(purchase_date)
try:
await asyncio.to_thread(
resend.Emails.send,
{
"from": settings.notification_email_from,
"to": [user_email],
"subject": f"Receipt processed: {store_name} - ${total:.2f}",
"html": (
f"<p>Your receipt from <strong>{store_name_safe}</strong> on "
f"{purchase_date_safe} has been processed.</p>"
f"<p>{item_count} items, total: ${total:.2f}</p>"
),
},
)
logger.info("Receipt notification sent to %s", user_email)
except Exception:
logger.exception("Failed to send receipt notification to %s", user_email)
@@ -0,0 +1 @@
"""Email receipt parsers for retailer email receipts."""
@@ -0,0 +1,32 @@
"""Base interface for email receipt parsers."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
@dataclass
class EmailReceipt:
"""Raw email data before parsing."""
sender: str
recipient: str
subject: str
body_html: str | None = None
body_plain: str | None = None
received_at: str | None = None
raw_headers: dict = field(default_factory=dict)
class BaseEmailParser(ABC):
"""All retailer email parsers implement this interface."""
@abstractmethod
def can_parse(self, email: EmailReceipt) -> bool:
"""Return True if this parser handles this email."""
...
@abstractmethod
def parse(self, email: EmailReceipt) -> dict:
"""Parse email into a dict matching PurchaseCreate schema fields.
Must include an items list matching PurchaseItemCreate fields."""
...
@@ -0,0 +1,25 @@
"""Detect which retailer sent a receipt email."""
import re
from receiptwitness.parsers.email.base import EmailReceipt
RETAILER_PATTERNS: dict[str, list[str]] = {
"meijer": [r"@meijer\.com$", r"@email\.meijer\.com$"],
"kroger": [r"@kroger\.com$", r"@email\.kroger\.com$"],
"target": [r"@target\.com$", r"@email\.target\.com$"],
}
def detect_retailer(email: EmailReceipt) -> str | None:
"""Return retailer slug or None if unrecognized."""
sender = email.sender.lower().strip()
# Extract email from "Name <email>" format
match = re.search(r"<([^>]+)>", sender)
if match:
sender = match.group(1)
for retailer, patterns in RETAILER_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, sender):
return retailer
return None
@@ -0,0 +1,157 @@
"""Kroger email receipt parser."""
import logging
import re
from datetime import datetime
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
logger = logging.getLogger(__name__)
def _to_decimal(value: str | float | int | None, default: Decimal = Decimal("0")) -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return default
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError):
return default
def _extract_total(body: str) -> Decimal:
"""Extract the transaction total from email body."""
patterns = [
r"Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Amount[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Grand\s+Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
]
for pattern in patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return _to_decimal(match.group(1))
return Decimal("0")
def _extract_receipt_id(body: str) -> str | None:
"""Extract receipt ID / transaction ID from HTML body.
Strips HTML tags first so that whitespace between delimiters and values
(e.g. from ``</strong> KR-2026-0315-4829`` -> `` KR-2026-0315-4829``)
is normalized and the pattern can match cleanly.
"""
stripped = re.sub(r"<[^>]+>", "", body)
patterns = [
r"Receipt\s*#[:\s]*([A-Z0-9-]+)",
r"Transaction\s*#[:\s]*([A-Z0-9-]+)",
r"Order\s*#[:\s]*([A-Z0-9-]+)",
r"Confirmation\s*#[:\s]*([A-Z0-9-]+)",
]
for pattern in patterns:
match = re.search(pattern, stripped, re.IGNORECASE)
if match:
return match.group(1)
return None
def _extract_date(body: str) -> str:
"""Extract purchase date from email body. Returns ISO date string or empty string."""
patterns = [
r"(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})",
r"([A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4})",
]
for pattern in patterns:
match = re.search(pattern, body)
if match:
raw = match.group(1)
try:
dt = datetime.strptime(raw.replace(",", ""), "%b %d %Y")
return dt.strftime("%Y-%m-%d")
except ValueError:
pass
try:
for fmt in ("%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y", "%d/%m/%y"):
try:
dt = datetime.strptime(raw, fmt)
return dt.strftime("%Y-%m-%d")
except ValueError:
continue
except Exception:
pass
return ""
def _extract_items_soup(body: str) -> list[dict]:
"""Extract line items from HTML email body using BeautifulSoup."""
items = []
try:
soup = BeautifulSoup(body, "html.parser")
text = soup.get_text(separator="\n", strip=True)
# Strip HTML tags from raw body to normalize whitespace
stripped = re.sub(r"<[^>]+>", " ", body)
stripped = re.sub(r"\s+", " ", stripped)
skip_prefixes = (
"Subtotal",
"Tax",
"Total",
"Kroger",
"Target",
"Date",
"Receipt",
"Order",
"Transaction",
"Confirmation",
"Thank",
"Questions",
"Keep",
"Receipt",
)
for line in text.split("\n"):
line = line.strip()
if not line or line.startswith(skip_prefixes):
continue
# Match lines like "Product Name $9.99"
match = re.match(r"(.+?)\s+\$([0-9]+\.[0-9]{2})\s*$", line)
if match:
name = match.group(1).strip()
price = _to_decimal(match.group(2))
if len(name) > 2 and price > 0:
items.append(
{
"product_name_raw": name,
"quantity": Decimal("1"),
"unit_price": price,
"extended_price": price,
}
)
except Exception:
pass
return items[:20]
class KrogerEmailParser(BaseEmailParser):
"""Parse Kroger email receipts (digital receipts via kroger.com)."""
KROGER_KEYWORDS = ("kroger", "kroger.com", "plus")
def can_parse(self, email: EmailReceipt) -> bool:
sender = (email.sender or "").lower()
body = (email.body_html or email.body_plain or "").lower()
return any(kw in sender or kw in body for kw in self.KROGER_KEYWORDS)
def parse(self, email: EmailReceipt) -> dict:
body = (email.body_html or email.body_plain or "").strip()
total = _extract_total(body)
receipt_id = _extract_receipt_id(body) or ""
purchase_date = _extract_date(body)
items = _extract_items_soup(body)
return {
"receipt_id": receipt_id,
"purchase_date": purchase_date,
"total": total,
"items": items,
}
@@ -0,0 +1,259 @@
"""Parse Meijer digital receipt emails into structured purchase data."""
import re
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from bs4.element import Tag
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
def _to_decimal(value, default: str = "0") -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return Decimal(default)
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError, TypeError):
return Decimal(default)
def _extract_receipt_id(soup: BeautifulSoup, subject: str | None) -> str | None:
"""Extract receipt/transaction ID from subject or body."""
if subject:
match = re.search(r"TXN[-\s]\d{4}[-\s]\d{4}[-\s]\d+", subject)
if match:
return match.group(0).replace(" ", "-")
# Fallback: look in body
text = soup.get_text()
match = re.search(r"TXN[-\s]\d{4}[-\s]\d{4}[-\s]\d+", text)
if match:
return match.group(0).replace(" ", "-")
return None
def _extract_purchase_date(soup: BeautifulSoup, subject: str | None) -> str | None:
"""Extract purchase date from subject or body."""
text = soup.get_text()
# Try ISO format first: YYYY-MM-DD
match = re.search(r"(\d{4})-(\d{2})-(\d{2})", text)
if match:
return f"{match.group(1)}-{match.group(2)}-{match.group(3)}"
# Try written format: March 15, 2026
match = re.search(r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4})", text)
if match:
month_str = match.group(1).lower()
day = match.group(2)
year = match.group(3)
month_map = {
"january": "01",
"february": "02",
"march": "03",
"april": "04",
"may": "05",
"june": "06",
"july": "07",
"august": "08",
"september": "09",
"october": "10",
"november": "11",
"december": "12",
}
month = month_map.get(month_str)
if month:
return f"{year}-{month}-{day.zfill(2)}"
# MM/DD/YYYY
match = re.search(r"(\d{1,2})/(\d{1,2})/(\d{4})", text)
if match:
return f"{match.group(3)}-{match.group(1).zfill(2)}-{match.group(2).zfill(2)}"
return None
def _extract_store_info(soup: BeautifulSoup) -> dict:
"""Extract store name and number from the email body."""
store_info: dict = {}
# Look for store number in header
store_num_match = re.search(r"Meijer\s+Store\s+#?(\d+)", soup.get_text(), re.IGNORECASE)
if store_num_match:
store_info["store_number"] = store_num_match.group(1)
return store_info
def _extract_items(table: Tag | None) -> list[dict]:
"""Extract line items from the items table."""
items: list[dict] = []
if not table:
return items
rows = table.find_all("tr")
for row in rows:
cells = row.find_all("td")
if len(cells) < 3:
continue
name_cell = cells[0].get_text(strip=True)
qty_cell = cells[1].get_text(strip=True)
price_cell = cells[2].get_text(strip=True)
if not name_cell or name_cell.lower() in ("item", "description"):
continue
# Skip subtotal/tax/total/savings rows
if any(
label in name_cell.lower()
for label in ("subtotal", "tax", "total", "savings", "grand total")
):
continue
try:
quantity = Decimal(qty_cell)
except (InvalidOperation, ValueError, TypeError):
quantity = Decimal("1")
price_str = price_cell.replace("$", "").replace(",", "").strip()
try:
unit_price = Decimal(price_str)
except (InvalidOperation, ValueError, TypeError):
unit_price = Decimal("0")
extended_price = unit_price # Default to unit price; no qty column in fixture
items.append(
{
"product_name_raw": name_cell,
"quantity": quantity,
"unit_price": unit_price,
"extended_price": extended_price,
}
)
return items
def _extract_totals_plain(text: str) -> dict:
"""Extract totals from plain text (no HTML)."""
totals: dict = {
"subtotal": None,
"tax": None,
"total": None,
"savings_total": None,
}
match = re.search(r"\bSubtotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["subtotal"] = _to_decimal(match.group(1))
match = re.search(r"\bTax\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["tax"] = _to_decimal(match.group(1))
grand_total_match = re.search(r"Grand\s+Total\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if grand_total_match:
totals["total"] = _to_decimal(grand_total_match.group(1))
savings_match = re.search(r"\bSavings\b[:\s$\-]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if savings_match:
totals["savings_total"] = _to_decimal(savings_match.group(1))
if totals["total"] is None:
total_match = re.search(r"\bTotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if total_match:
totals["total"] = _to_decimal(total_match.group(1))
return totals
def _extract_totals(soup: BeautifulSoup) -> dict:
"""Extract subtotal, tax, total, and savings from the totals section."""
text = soup.get_text()
totals: dict = {
"subtotal": None,
"tax": None,
"total": None,
"savings_total": None,
}
# Subtotal — use word boundary to avoid matching "Subtotal" with "Total"
match = re.search(r"\bSubtotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["subtotal"] = _to_decimal(match.group(1))
# Tax
match = re.search(r"\bTax\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["tax"] = _to_decimal(match.group(1))
# Grand Total (before plain "Total" to avoid matching "Subtotal")
grand_total_match = re.search(r"Grand\s+Total\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if grand_total_match:
totals["total"] = _to_decimal(grand_total_match.group(1))
# Savings — allow any combination of whitespace/$- around the number
savings_match = re.search(r"\bSavings\b[:\s$\-]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if savings_match:
totals["savings_total"] = _to_decimal(savings_match.group(1))
# Plain "Total" only if Grand Total wasn't found
if totals["total"] is None:
total_match = re.search(r"\bTotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if total_match:
totals["total"] = _to_decimal(total_match.group(1))
return totals
class MeijerEmailParser(BaseEmailParser):
"""Parse Meijer digital receipt emails forwarded by users."""
def can_parse(self, email: EmailReceipt) -> bool:
sender = email.sender.lower().strip()
# Extract email from "Name <email>" format
match = re.search(r"<([^>]+)>", sender)
if match:
sender = match.group(1)
return "meijer" in sender
def parse(self, email: EmailReceipt) -> dict:
body_html = email.body_html
body_plain = email.body_plain or ""
body = body_html or body_plain
soup = BeautifulSoup(body, "html.parser")
receipt_id = _extract_receipt_id(soup, email.subject)
purchase_date = _extract_purchase_date(soup, email.subject)
_ = _extract_store_info(soup)
# Find the items table — look for one with Item/Qty/Price headers
table = None
for tbl in soup.find_all("table"):
headers = tbl.find_all("th")
header_texts = [h.get_text(strip=True).lower() for h in headers]
if any("item" in h or "qty" in h or "price" in h for h in header_texts):
table = tbl
break
items = _extract_items(table)
# Extract totals from HTML; fall back to plain text if no HTML
if body_html:
totals = _extract_totals(soup)
else:
totals = _extract_totals_plain(body_plain)
return {
"receipt_id": receipt_id or "",
"purchase_date": purchase_date or "",
"total": totals["total"] or Decimal("0"),
"subtotal": totals["subtotal"],
"tax": totals["tax"],
"savings_total": totals["savings_total"],
"items": items,
}
@@ -0,0 +1,156 @@
"""Target email receipt parser."""
import logging
import re
from datetime import datetime
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
logger = logging.getLogger(__name__)
def _to_decimal(value: str | float | int | None, default: Decimal = Decimal("0")) -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return default
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError):
return default
def _extract_total(body: str) -> Decimal:
"""Extract the transaction total from email body."""
patterns = [
r"Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Amount[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Grand\s+Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
]
for pattern in patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return _to_decimal(match.group(1))
return Decimal("0")
def _extract_receipt_id(body: str) -> str | None:
"""Extract receipt ID / transaction ID from HTML body.
Strips HTML tags first so that whitespace between delimiters and values
(e.g. from ``</strong> TGT-2026-0318-9124`` -> `` TGT-2026-0318-9124``)
is normalized and the pattern can match cleanly.
"""
stripped = re.sub(r"<[^>]+>", "", body)
patterns = [
r"Receipt\s*#[:\s]*([A-Z0-9-]+)",
r"Order\s*#[:\s]*([A-Z0-9-]+)",
r"Confirmation\s*#[:\s]*([A-Z0-9-]+)",
r"Target\s+Order\s*#[:\s]*([A-Z0-9-]+)",
]
for pattern in patterns:
match = re.search(pattern, stripped, re.IGNORECASE)
if match:
return match.group(1)
return None
def _extract_date(body: str) -> str:
"""Extract purchase date from email body. Returns ISO date string or empty string."""
patterns = [
r"(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})",
r"([A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4})",
]
for pattern in patterns:
match = re.search(pattern, body)
if match:
raw = match.group(1)
try:
dt = datetime.strptime(raw.replace(",", ""), "%b %d %Y")
return dt.strftime("%Y-%m-%d")
except ValueError:
pass
try:
for fmt in ("%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y", "%d/%m/%y"):
try:
dt = datetime.strptime(raw, fmt)
return dt.strftime("%Y-%m-%d")
except ValueError:
continue
except Exception:
pass
return ""
def _extract_items_soup(body: str) -> list[dict]:
"""Extract line items from HTML email body using BeautifulSoup."""
items = []
try:
soup = BeautifulSoup(body, "html.parser")
text = soup.get_text(separator="\n", strip=True)
for line in text.split("\n"):
line = line.strip()
if not line or line.startswith(
(
"Subtotal",
"Tax",
"Total",
"Target",
"Kroger",
"Date",
"Receipt",
"Order",
"Transaction",
"Confirmation",
"Thank",
"Questions",
"Keep",
"Receipt",
"Store",
)
):
continue
# Match lines like "Product Name $9.99"
match = re.match(r"(.+?)\s+\$([0-9]+\.[0-9]{2})\s*$", line)
if match:
name = match.group(1).strip()
price = _to_decimal(match.group(2))
if len(name) > 2 and price > 0:
items.append(
{
"product_name_raw": name,
"quantity": Decimal("1"),
"unit_price": price,
"extended_price": price,
}
)
except Exception:
pass
return items[:20]
class TargetEmailParser(BaseEmailParser):
"""Parse Target email receipts (Circle order confirmations)."""
TARGET_KEYWORDS = ("target.com", "targetnow", "circle", "target")
def can_parse(self, email: EmailReceipt) -> bool:
sender = (email.sender or "").lower()
body = (email.body_html or email.body_plain or "").lower()
return any(kw in sender or kw in body for kw in self.TARGET_KEYWORDS)
def parse(self, email: EmailReceipt) -> dict:
body = (email.body_html or email.body_plain or "").strip()
total = _extract_total(body)
receipt_id = _extract_receipt_id(body) or ""
purchase_date = _extract_date(body)
items = _extract_items_soup(body)
return {
"receipt_id": receipt_id,
"purchase_date": purchase_date,
"total": total,
"items": items,
}
@@ -0,0 +1 @@
"""DragonflyDB Streams queue for email receipt processing."""
@@ -0,0 +1,77 @@
"""DragonflyDB Streams queue for email receipt processing."""
from __future__ import annotations
import json
import logging
from dataclasses import asdict, dataclass
from typing import cast
import redis.asyncio as aioredis
from receiptwitness.config import settings
logger = logging.getLogger(__name__)
STREAM_KEY = "email:receipts"
CONSUMER_GROUP = "email-workers"
@dataclass
class EmailJob:
"""Payload for an email receipt processing job."""
user_id: str
sender: str
recipient: str
subject: str
body_html: str | None
body_plain: str | None
received_at: str
message_id: str # from email provider, for dedup
async def get_redis() -> aioredis.Redis:
"""Get async Redis/DragonflyDB client."""
return cast(aioredis.Redis, aioredis.from_url(settings.redis_url, decode_responses=True))
async def ensure_consumer_group(client: aioredis.Redis) -> None:
"""Create consumer group if it does not exist."""
try:
await client.xgroup_create(STREAM_KEY, CONSUMER_GROUP, id="0", mkstream=True)
except aioredis.ResponseError as e:
if "BUSYGROUP" not in str(e):
raise
async def enqueue_email(client: aioredis.Redis, job: EmailJob) -> str:
"""Add email job to the stream. Returns the stream message ID."""
payload: dict[str, str | bytes | int | float] = {"data": json.dumps(asdict(job))}
msg_id: str = cast(str, await client.xadd(STREAM_KEY, payload)) # type: ignore[arg-type] # redis-py StreamCommands.xadd expects broader FieldT union; runtime behavior is correct
logger.info("Enqueued email job %s for user %s", msg_id, job.user_id)
return msg_id
async def consume_emails(
client: aioredis.Redis,
consumer_name: str,
count: int = 1,
block_ms: int = 5000,
) -> list[tuple[str, EmailJob]]:
"""Read pending messages from the stream. Returns list of (msg_id, EmailJob)."""
await ensure_consumer_group(client)
messages = await client.xreadgroup(
CONSUMER_GROUP, consumer_name, {STREAM_KEY: ">"}, count=count, block=block_ms
)
results = []
for _stream, entries in messages:
for msg_id, fields in entries:
job = EmailJob(**json.loads(fields["data"]))
results.append((msg_id, job))
return results
async def ack_email(client: aioredis.Redis, msg_id: str) -> None:
"""Acknowledge a processed message."""
await client.xack(STREAM_KEY, CONSUMER_GROUP, msg_id)
@@ -0,0 +1 @@
"""Async email receipt worker consuming from DragonflyDB Streams."""
@@ -0,0 +1,104 @@
"""Async worker that consumes email receipt jobs from DragonflyDB Streams."""
import asyncio
import logging
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from sqlalchemy import select
from receiptwitness.config import settings
from receiptwitness.events import publish_receipt_ingested
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
from receiptwitness.parsers.email.detector import detect_retailer
from receiptwitness.parsers.email.kroger import KrogerEmailParser
from receiptwitness.parsers.email.meijer import MeijerEmailParser
from receiptwitness.parsers.email.target import TargetEmailParser
from receiptwitness.queue.email import ack_email, consume_emails, get_redis
logger = logging.getLogger(__name__)
CONSUMER_NAME = "worker-1"
# Registry of available email parsers
PARSERS: dict[str, BaseEmailParser] = {
"meijer": MeijerEmailParser(),
"kroger": KrogerEmailParser(),
"target": TargetEmailParser(),
}
async def resolve_user(token: str) -> str | None:
"""Look up user_id from email_inbound_token."""
session_factory = get_async_session_factory(settings.database_url)
async with session_factory() as session:
result = await session.execute(select(User.id).where(User.email_inbound_token == token))
row = result.scalar_one_or_none()
return str(row) if row else None
async def process_job(msg_id: str, job) -> bool:
"""Process a single email job. Returns True on success."""
# 1. Resolve user from token
user_id = await resolve_user(job.user_id) # user_id field holds token
if not user_id:
logger.warning("Unknown token %s, dropping message %s", job.user_id, msg_id)
return True # ack to avoid infinite retry
# 2. Build EmailReceipt
email = EmailReceipt(
sender=job.sender,
recipient=job.recipient,
subject=job.subject,
body_html=job.body_html,
body_plain=job.body_plain,
received_at=job.received_at,
)
# 3. Detect retailer
retailer = detect_retailer(email)
if not retailer or retailer not in PARSERS:
logger.warning(
"Unrecognized retailer from %s, archiving msg %s",
job.sender,
msg_id,
)
return True # ack — no parser available
# 4. Parse
parser = PARSERS[retailer]
parsed = parser.parse(email)
# 5. Publish event
await publish_receipt_ingested(
user_id=user_id,
store_slug=retailer,
purchase_id=parsed.get("receipt_id", msg_id),
purchase_date=parsed.get("purchase_date", ""),
item_count=len(parsed.get("items", [])),
total=parsed.get("total", 0),
)
return True
async def run_worker() -> None:
"""Main worker loop — consume and process email jobs."""
client = await get_redis()
logger.info("Email worker started, consuming from email:receipts")
while True:
try:
jobs = await consume_emails(client, CONSUMER_NAME, count=5, block_ms=5000)
for msg_id, job in jobs:
try:
success = await process_job(msg_id, job)
if success:
await ack_email(client, msg_id)
except Exception:
logger.exception("Failed to process email job %s", msg_id)
except Exception:
logger.exception("Worker loop error, retrying in 5s")
await asyncio.sleep(5)
if __name__ == "__main__":
asyncio.run(run_worker())
+45
View File
@@ -0,0 +1,45 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Kroger Digital Receipt</title>
</head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto; padding: 20px; color: #333;">
<div style="background-color: #0057a8; color: white; padding: 20px; text-align: center;">
<img src="https://www.kroger.com/email-logo.png" alt="Kroger" style="height: 40px;">
<h1 style="margin: 10px 0; font-size: 24px;">Your Digital Receipt</h1>
<p style="margin: 0;">Kroger Plus Member</p>
</div>
<div style="padding: 20px; background-color: #f5f5f5;">
<h2 style="color: #0057a8; margin-top: 0;">Kroger #882 - Downtown</h2>
<p style="margin: 5px 0;">123 Main Street<br>Anytown, OH 45202</p>
<p style="margin: 5px 0;"><strong>Date:</strong> 03/15/2026</p>
<p style="margin: 5px 0;"><strong>Receipt #:</strong> KR-2026-0315-4829</p>
<p style="margin: 5px 0;"><strong>Transaction #:</strong> TXN-789123456</p>
</div>
<div style="padding: 20px;">
<h3>Items Purchased</h3>
<p>Whole Milk 1 Gallon $3.99</p>
<p>Sourdough Bread $4.49</p>
<p>Free Range Eggs 12ct $5.99</p>
<p>Baby Spinach 5oz $4.29</p>
</div>
<div style="padding: 20px; background-color: #e8f4e8; border-left: 4px solid #0057a8;">
<p style="margin: 5px 0;"><strong>Subtotal:</strong> $18.76</p>
<p style="margin: 5px 0;"><strong>Tax:</strong> $1.24</p>
<p style="margin: 5px 0; color: #0057a8; font-weight: bold; font-size: 18px;">Total: $20.00</p>
</div>
<div style="padding: 15px; margin-top: 15px; background-color: #fff8e1; border-left: 4px solid #ffc107;">
<p style="margin: 0; font-size: 14px; color: #666;">Kroger Plus Savings: <strong>$3.25</strong> saved on this order.</p>
</div>
<div style="padding: 20px; text-align: center; color: #999; font-size: 12px; margin-top: 20px;">
<p>Thank you for shopping at Kroger!</p>
<p>Keep your receipt for returns within 90 days.</p>
</div>
</body>
</html>
+127
View File
@@ -0,0 +1,127 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Meijer Digital Receipt</title>
<style>
body { font-family: Arial, sans-serif; background: #f4f4f4; margin: 0; padding: 20px; }
.receipt-container { background: #ffffff; max-width: 600px; margin: 0 auto; padding: 30px; border: 1px solid #dddddd; }
.header { background: #003399; color: #ffffff; padding: 20px; text-align: center; margin: -30px -30px 20px -30px; }
.header h1 { margin: 0; font-size: 24px; }
.store-info { text-align: center; margin-bottom: 20px; border-bottom: 2px dashed #cccccc; padding-bottom: 15px; }
.store-info h2 { margin: 0; font-size: 18px; color: #003399; }
.receipt-meta { display: flex; justify-content: space-between; font-size: 14px; color: #555555; margin-bottom: 20px; }
table { width: 100%; border-collapse: collapse; margin-bottom: 20px; }
th { background: #f0f0f0; text-align: left; padding: 8px 10px; font-size: 13px; color: #333333; }
td { padding: 8px 10px; border-bottom: 1px solid #eeeeee; font-size: 14px; }
.item-name { font-weight: bold; }
.totals { margin-left: auto; width: 250px; }
.totals-row { display: flex; justify-content: space-between; padding: 6px 0; font-size: 14px; }
.totals-row.grand-total { font-weight: bold; font-size: 16px; border-top: 2px solid #333333; padding-top: 10px; margin-top: 4px; }
.savings { color: #cc0000; }
.footer { text-align: center; font-size: 12px; color: #888888; margin-top: 20px; padding-top: 15px; border-top: 1px solid #dddddd; }
</style>
</head>
<body>
<div class="receipt-container">
<div class="header">
<h1>MEIJER</h1>
<p style="margin: 5px 0 0; font-size: 14px;">Digital Receipt</p>
</div>
<div class="store-info">
<h2>Meijer Store #42</h2>
<p style="margin: 5px 0 0; font-size: 13px; color: #666;">1555 Lake Drive SE, Grand Rapids, MI 49506</p>
</div>
<div class="receipt-meta">
<div>
<strong>Date:</strong> March 15, 2026<br />
<strong>Time:</strong> 2:34 PM
</div>
<div style="text-align: right;">
<strong>Transaction #</strong><br />
TXN-2026-0315-0042
</div>
</div>
<table>
<thead>
<tr>
<th>Item</th>
<th style="text-align: center;">Qty</th>
<th style="text-align: right;">Price</th>
</tr>
</thead>
<tbody>
<tr>
<td class="item-name">ORGANIC BANANAS</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$0.69</td>
</tr>
<tr>
<td class="item-name">WHOLE MILK 1 GAL</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.29</td>
</tr>
<tr>
<td class="item-name">MEIJER WHOLE GRAIN OAT CEREAL 18OZ</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.99</td>
</tr>
<tr>
<td class="item-name">FRESH BROCCOLI CROWN</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$2.49</td>
</tr>
<tr>
<td class="item-name">GROUND BEEF 85/15 1LB</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$6.99</td>
</tr>
<tr>
<td class="item-name">SOURDOUGH BREAD</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$3.99</td>
</tr>
<tr>
<td class="item-name">MEIJER BABY SPINACH 5OZ</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.49</td>
</tr>
<tr>
<td class="item-name">LARGE EGGS DOZEN</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$3.29</td>
</tr>
</tbody>
</table>
<div class="totals">
<div class="totals-row">
<span>Subtotal</span>
<span>$31.22</span>
</div>
<div class="totals-row">
<span>Tax</span>
<span>$2.19</span>
</div>
<div class="totals-row savings">
<span>Total Savings</span>
<span>-$3.40</span>
</div>
<div class="totals-row grand-total">
<span>Total</span>
<span>$33.41</span>
</div>
</div>
<div class="footer">
<p>Thank you for shopping at Meijer!</p>
<p>Keep your receipt for your records.<br />
Questions? Call 1-800-927-8699 or visit meijer.com</p>
</div>
</div>
</body>
</html>
+44
View File
@@ -0,0 +1,44 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Target Order Confirmation</title>
</head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto; padding: 20px; color: #333;">
<div style="background-color: #cc0000; color: white; padding: 20px; text-align: center;">
<img src="https://assets.target.com/email-logo.png" alt="Target" style="height: 40px;">
<h1 style="margin: 10px 0; font-size: 24px;">Order Confirmation</h1>
<p style="margin: 0;">Thanks for shopping Target Circle!</p>
</div>
<div style="padding: 20px; background-color: #f5f5f5;">
<h2 style="color: #cc0000; margin-top: 0;">Target Store #1247 - Riverside</h2>
<p style="margin: 5px 0;">4500 River Road<br>Columbus, OH 43220</p>
<p style="margin: 5px 0;"><strong>Date:</strong> 03/18/2026</p>
<p style="margin: 5px 0;"><strong>Order #:</strong> TGT-2026-0318-9124</p>
<p style="margin: 5px 0;"><strong>Confirmation #:</strong> CNF-44772819</p>
</div>
<div style="padding: 20px;">
<h3>Items Purchased</h3>
<p>Good & Gather Whole Milk 1 Gal $3.89</p>
<p>Arborio Rice 2lb bag $6.49</p>
<p>Parmesan Wedge 8oz $7.99</p>
</div>
<div style="padding: 20px; background-color: #fff8e1; border-left: 4px solid #cc0000;">
<p style="margin: 5px 0;"><strong>Subtotal:</strong> $18.37</p>
<p style="margin: 5px 0;"><strong>Tax:</strong> $1.45</p>
<p style="margin: 5px 0; color: #cc0000; font-weight: bold; font-size: 18px;">Total: $19.82</p>
</div>
<div style="padding: 15px; margin-top: 15px; background-color: #e8f4e8; border-left: 4px solid #4caf50;">
<p style="margin: 0; font-size: 14px; color: #333;">Target Circle offer saved you <strong>$0.30</strong> on this order.</p>
</div>
<div style="padding: 20px; text-align: center; color: #999; font-size: 12px; margin-top: 20px;">
<p>Questions? Call Target Guest Services at 1-800-591-3869.</p>
<p>Receipt valid for returns within 30 days.</p>
</div>
</body>
</html>
@@ -0,0 +1 @@
"""Tests for the ReceiptWitness API routes."""
@@ -0,0 +1,125 @@
"""Tests for the /inbound/email webhook endpoint."""
import hashlib
import hmac
import time
from unittest.mock import AsyncMock, patch
import pytest
from fastapi.testclient import TestClient
from receiptwitness.main import app
@pytest.fixture
def client():
return TestClient(app)
@pytest.fixture
def mock_redis():
redis_mock = AsyncMock()
with patch("receiptwitness.api.routes.get_redis", return_value=redis_mock):
enqueue_patcher = patch("receiptwitness.api.routes.enqueue_email", new_callable=AsyncMock)
with enqueue_patcher as mock_enqueue:
yield {"redis": redis_mock, "enqueue": mock_enqueue}
def make_signature(signing_key: str, token: str, timestamp: str) -> str:
return hmac.new(
signing_key.encode(),
f"{timestamp}{token}".encode(),
hashlib.sha256,
).hexdigest()
def valid_form(signing_key: str = "test-secret"):
ts = str(int(time.time()))
token = "test-token"
sig = make_signature(signing_key, token, ts)
return {
"token": token,
"timestamp": ts,
"signature": sig,
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Your Meijer Receipt",
"body-html": "<p>Thank you for shopping at Meijer</p>",
"body-plain": "Thank you for shopping at Meijer",
"Message-Id": "<msg-001@example.com>",
}
def test_valid_webhook(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
response = client.post("/inbound/email", data=valid_form())
assert response.status_code == 200
assert response.json() == {"status": "queued"}
mock_redis["enqueue"].assert_awaited_once()
def test_invalid_signature(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = valid_form()
form["signature"] = "wrong-signature"
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_invalid_recipient_no_plus(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = valid_form()
form["recipient"] = "receipts@example.com" # no plus-address
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid recipient"
mock_redis["enqueue"].assert_not_awaited()
def test_stale_timestamp(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
ts = str(int(time.time()) - 600) # 10 min old
token = "test-token"
sig = make_signature("test-secret", token, ts)
form = {
"token": token,
"timestamp": ts,
"signature": sig,
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Receipt",
}
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_invalid_timestamp_returns_406(client, mock_redis):
"""Empty timestamp should return 406, not 500."""
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = {
"token": "test-token",
"timestamp": "",
"signature": "any-sig",
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Receipt",
}
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_get_inbound_email_returns_405(client):
"""GET /inbound/email is not allowed."""
response = client.get("/inbound/email")
assert response.status_code == 405
@@ -0,0 +1,84 @@
"""Tests for email notifications."""
from unittest.mock import patch
import pytest
class TestSendReceiptNotification:
@pytest.fixture
def mock_resend(self):
with patch("receiptwitness.notifications.email.resend") as mock:
yield mock
@pytest.mark.asyncio
async def test_sends_email_with_correct_params(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with (
patch("receiptwitness.notifications.email.settings") as mock_settings,
patch(
"receiptwitness.notifications.email.asyncio.to_thread",
new=lambda fn, *args, **kwargs: fn(*args, **kwargs),
),
):
mock_settings.notifications_enabled = True
mock_settings.resend_api_key = "re_testkey_123"
mock_settings.notification_email_from = "noreply@test.com"
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_called_once_with(
{
"from": "noreply@test.com",
"to": ["user@example.com"],
"subject": "Receipt processed: Meijer - $42.99",
"html": (
"<p>Your receipt from <strong>Meijer</strong> on "
"2026-03-28 has been processed.</p>"
"<p>5 items, total: $42.99</p>"
),
}
)
@pytest.mark.asyncio
async def test_skips_when_disabled(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with patch("receiptwitness.notifications.email.settings") as mock_settings:
mock_settings.notifications_enabled = False
mock_settings.resend_api_key = "re_testkey_123"
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_not_called()
@pytest.mark.asyncio
async def test_skips_when_api_key_empty(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with patch("receiptwitness.notifications.email.settings") as mock_settings:
mock_settings.notifications_enabled = True
mock_settings.resend_api_key = ""
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_not_called()
@@ -0,0 +1,49 @@
"""Tests for retailer detector."""
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.detector import detect_retailer
def test_detect_meijer():
email = EmailReceipt(
sender="receipts@meijer.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "meijer"
def test_detect_kroger():
email = EmailReceipt(
sender="noreply@email.kroger.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "kroger"
def test_detect_target():
email = EmailReceipt(
sender="Target <receipts@target.com>",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "target"
def test_detect_unknown():
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) is None
def test_detect_case_insensitive():
email = EmailReceipt(
sender="Receipts@MEIJER.COM",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "meijer"
@@ -0,0 +1,93 @@
"""Tests for KrogerEmailParser."""
from pathlib import Path
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.kroger import KrogerEmailParser
FIXTURE_PATH = Path(__file__).parent.parent.parent / "fixtures" / "kroger_email_receipt.html"
class TestKrogerEmailParser:
"""Tests for KrogerEmailParser."""
def setup_method(self) -> None:
self.parser = KrogerEmailParser()
self.fixture_html = FIXTURE_PATH.read_text()
def test_can_parse_kroger_sender(self) -> None:
email = EmailReceipt(
sender="noreply@email.kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
assert self.parser.can_parse(email) is True
def test_can_parse_kroger_in_body(self) -> None:
email = EmailReceipt(
sender="someone@unknown.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Kroger digital receipt</body></html>",
)
assert self.parser.can_parse(email) is True
def test_cannot_parse_unrelated(self) -> None:
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Walmart receipt</body></html>",
)
assert self.parser.can_parse(email) is False
def test_parse_items(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
items = result.get("items", [])
assert len(items) >= 3
product_names = [item["product_name_raw"] for item in items]
assert any("Whole Milk" in name for name in product_names)
assert any("Sourdough" in name for name in product_names)
for item in items:
assert "unit_price" in item
assert "extended_price" in item
def test_parse_totals(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
total = result.get("total", 0)
assert total > 0
def test_parse_receipt_id(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
receipt_id = result.get("receipt_id", "")
assert "KR-2026" in receipt_id or "TXN" in receipt_id
def test_parse_date(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
purchase_date = result.get("purchase_date", "")
assert purchase_date == "2026-03-15"
@@ -0,0 +1,182 @@
"""Tests for the Meijer email receipt parser."""
import os
from decimal import Decimal
import pytest
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.meijer import MeijerEmailParser
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "..", "..", "fixtures", "meijer_email_receipt.html"
)
def load_fixture() -> str:
with open(FIXTURE_PATH) as f:
return f.read()
@pytest.fixture
def meijer_email() -> EmailReceipt:
html = load_fixture()
return EmailReceipt(
sender="Meijer Receipts <receipts@email.meijer.com>",
recipient="shopper@example.com",
subject="Your Meijer Receipt — Transaction #TXN-2026-0315-0042",
body_html=html,
body_plain=None,
received_at="2026-03-15T14:34:00Z",
)
@pytest.fixture
def kroger_email() -> EmailReceipt:
return EmailReceipt(
sender="Kroger <noreply@email.kroger.com>",
recipient="shopper@example.com",
subject="Your Kroger Receipt",
body_html="<html><body>Kroger receipt</body></html>",
)
class TestCanParse:
def test_can_parse_meijer(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
assert parser.can_parse(meijer_email) is True
def test_cannot_parse_kroger(self, kroger_email: EmailReceipt):
parser = MeijerEmailParser()
assert parser.can_parse(kroger_email) is False
def test_can_parse_meijer_plain_sender(self):
email = EmailReceipt(
sender="receipts@meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html="<html></html>",
)
parser = MeijerEmailParser()
assert parser.can_parse(email) is True
def test_cannot_parse_non_meijer(self):
email = EmailReceipt(
sender=" Target <no-reply@target.com>",
recipient="shopper@example.com",
subject="Target Receipt",
body_html="<html></html>",
)
parser = MeijerEmailParser()
assert parser.can_parse(email) is False
class TestParseMeijerReceipt:
def test_receipt_id_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["receipt_id"] == "TXN-2026-0315-0042"
def test_purchase_date_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["purchase_date"] == "2026-03-15"
def test_items_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
items = result["items"]
assert len(items) == 8
names = [item["product_name_raw"] for item in items]
assert "ORGANIC BANANAS" in names
assert "WHOLE MILK 1 GAL" in names
assert "GROUND BEEF 85/15 1LB" in names
def test_item_quantities(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
# Find ORGANIC BANANAS
bananas = next(i for i in result["items"] if "BANANAS" in i["product_name_raw"])
assert bananas["quantity"] == Decimal("1")
def test_item_prices(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
# Find ORGANIC BANANAS
bananas = next(i for i in result["items"] if "BANANAS" in i["product_name_raw"])
assert bananas["unit_price"] == Decimal("0.69")
assert bananas["extended_price"] == Decimal("0.69")
def test_totals(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["total"] == Decimal("33.41")
assert result["subtotal"] == Decimal("31.22")
assert result["tax"] == Decimal("2.19")
assert result["savings_total"] == Decimal("3.40")
class TestParseHandlesMissingFields:
def test_missing_body_html_falls_back_to_plain(self):
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Your Meijer Receipt",
body_html=None,
body_plain="TXN-1234 | March 15, 2026 | Total: $10.00",
)
parser = MeijerEmailParser()
result = parser.parse(email)
# Should not raise, returns minimal result
assert result["receipt_id"] == ""
assert result["purchase_date"] == "2026-03-15"
assert result["total"] == Decimal("10.00")
def test_empty_email(self):
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html="",
body_plain="",
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["receipt_id"] == ""
assert result["purchase_date"] == ""
assert result["total"] == Decimal("0")
assert result["items"] == []
def test_missing_subject_date_from_body(self):
html = """
<html>
<body>
<p>Thank you for shopping on April 1, 2026</p>
<p>Total: $15.00</p>
</body>
</html>
"""
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject=None,
body_html=html,
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["purchase_date"] == "2026-04-01"
def test_missing_totals_defaults_to_zero(self):
html = "<html><body><p>Just an email with no totals</p></body></html>"
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html=html,
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["total"] == Decimal("0")
assert result["subtotal"] is None
assert result["tax"] is None
@@ -0,0 +1,93 @@
"""Tests for TargetEmailParser."""
from pathlib import Path
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.target import TargetEmailParser
FIXTURE_PATH = Path(__file__).parent.parent.parent / "fixtures" / "target_email_receipt.html"
class TestTargetEmailParser:
"""Tests for TargetEmailParser."""
def setup_method(self) -> None:
self.parser = TargetEmailParser()
self.fixture_html = FIXTURE_PATH.read_text()
def test_can_parse_target_sender(self) -> None:
email = EmailReceipt(
sender="receipts@target.com",
recipient="user@example.com",
subject="Your Target Order Confirmation",
body_html=self.fixture_html,
)
assert self.parser.can_parse(email) is True
def test_can_parse_circle_in_body(self) -> None:
email = EmailReceipt(
sender="someone@unknown.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Target Circle savings offer</body></html>",
)
assert self.parser.can_parse(email) is True
def test_cannot_parse_unrelated(self) -> None:
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Walmart receipt</body></html>",
)
assert self.parser.can_parse(email) is False
def test_parse_items(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
items = result.get("items", [])
assert len(items) >= 3
product_names = [item["product_name_raw"] for item in items]
assert any("Whole Milk" in name for name in product_names)
assert any("Arborio" in name for name in product_names)
for item in items:
assert "unit_price" in item
assert "extended_price" in item
def test_parse_totals(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
total = result.get("total", 0)
assert total > 0
def test_parse_receipt_id(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
receipt_id = result.get("receipt_id", "")
assert "TGT-2026" in receipt_id or "CNF" in receipt_id
def test_parse_date(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
purchase_date = result.get("purchase_date", "")
assert purchase_date == "2026-03-18"
@@ -0,0 +1,79 @@
"""Tests for email queue using DragonflyDB Streams."""
import pytest
from fakeredis import aioredis as fake_aioredis
from receiptwitness.queue.email import (
CONSUMER_GROUP,
STREAM_KEY,
EmailJob,
ack_email,
consume_emails,
enqueue_email,
ensure_consumer_group,
)
@pytest.fixture
async def fake_client():
"""Yield a fake async Redis client."""
client = fake_aioredis.FakeRedis(decode_responses=True)
yield client
await client.aclose()
@pytest.fixture
def sample_job():
"""Sample EmailJob for testing."""
return EmailJob(
user_id="user-123",
sender="no-reply@kroger.com",
recipient="user@example.com",
subject="Kroger Receipt",
body_html="<html><body>Receipt</body></html>",
body_plain="Receipt",
received_at="2026-04-01T12:00:00Z",
message_id="msg-abc-123",
)
@pytest.mark.asyncio
async def test_enqueue_and_consume(fake_client, sample_job):
"""Enqueue a job, consume it, verify fields match."""
msg_id = await enqueue_email(fake_client, sample_job)
assert msg_id is not None
consumed = await consume_emails(fake_client, "test-worker", count=1, block_ms=100)
assert len(consumed) == 1
consumed_id, consumed_job = consumed[0]
assert consumed_id == msg_id
assert consumed_job.user_id == sample_job.user_id
assert consumed_job.sender == sample_job.sender
assert consumed_job.recipient == sample_job.recipient
assert consumed_job.subject == sample_job.subject
assert consumed_job.message_id == sample_job.message_id
@pytest.mark.asyncio
async def test_ack_removes_from_pending(fake_client, sample_job):
"""After ack, message is no longer pending."""
msg_id = await enqueue_email(fake_client, sample_job)
# Consume the message (moves it to pending)
consumed = await consume_emails(fake_client, "test-worker", count=1, block_ms=100)
assert len(consumed) == 1
# Acknowledge it
await ack_email(fake_client, msg_id)
# Check pending count for this consumer group
pending = await fake_client.xpending(STREAM_KEY, CONSUMER_GROUP)
assert pending is None or pending["pending"] == 0
@pytest.mark.asyncio
async def test_ensure_consumer_group_idempotent(fake_client):
"""Calling ensure_consumer_group twice does not error."""
await ensure_consumer_group(fake_client)
# Calling again should not raise
await ensure_consumer_group(fake_client)
@@ -0,0 +1,188 @@
"""Tests for email_worker."""
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fakeredis import aioredis as fake_aioredis
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.queue.email import (
EmailJob,
)
from receiptwitness.worker.email_worker import (
process_job,
resolve_user,
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
async def fake_redis():
"""Fake async Redis client for queue testing."""
client = fake_aioredis.FakeRedis(decode_responses=True)
yield client
await client.aclose()
@pytest.fixture
def sample_email_job():
"""Sample EmailJob matching DragonflyDB queue schema."""
return EmailJob(
user_id="token-abc-123",
sender="no-reply@meijer.com",
recipient="user@example.com",
subject="Your Meijer Receipt",
body_html="<html><body>Total: $42.00</body></html>",
body_plain="Total: $42.00",
received_at="2026-04-01T12:00:00Z",
message_id="msg-xyz-789",
)
@pytest.fixture
def sample_email():
"""Sample EmailReceipt for parser testing."""
return EmailReceipt(
sender="no-reply@meijer.com",
recipient="user@example.com",
subject="Your Meijer Receipt",
body_html="<html><body>Total: $42.00<br/>Receipt #12345</body></html>",
body_plain="Total: $42.00",
received_at="2026-04-01T12:00:00Z",
)
# ---------------------------------------------------------------------------
# resolve_user tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_resolve_user_valid_token():
"""Valid token returns user_id string."""
mock_session = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = "user-uuid-42"
mock_session.execute.return_value = mock_result
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
factory = MagicMock(return_value=mock_session)
with patch(
"receiptwitness.worker.email_worker.get_async_session_factory",
return_value=factory,
):
user_id = await resolve_user("token-abc-123")
assert user_id == "user-uuid-42"
factory.assert_called_once()
@pytest.mark.asyncio
async def test_resolve_user_invalid_token():
"""Invalid token returns None."""
mock_session = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = None
mock_session.execute.return_value = mock_result
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
factory = MagicMock(return_value=mock_session)
with patch(
"receiptwitness.worker.email_worker.get_async_session_factory",
return_value=factory,
):
user_id = await resolve_user("bad-token")
assert user_id is None
# ---------------------------------------------------------------------------
# process_job tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_process_job_unknown_retailer(sample_email_job):
"""Unknown retailer logs warning and returns True (ack, no retry)."""
unknown_job = EmailJob(
user_id="token-abc-123",
sender="no-reply@unknownretailer.com",
recipient="user@example.com",
subject="Receipt",
body_html="<html></html>",
body_plain="",
received_at="2026-04-01T12:00:00Z",
message_id="msg-xyz-789",
)
with (
patch(
"receiptwitness.worker.email_worker.resolve_user",
return_value="user-uuid-42",
),
patch(
"receiptwitness.worker.email_worker.publish_receipt_ingested",
new_callable=AsyncMock,
) as mock_publish,
):
result = await process_job("msg-id-1", unknown_job)
assert result is True
mock_publish.assert_not_called()
@pytest.mark.asyncio
async def test_process_job_success(sample_email_job, sample_email):
"""Known retailer: full pipeline runs — parse, normalize, publish event."""
parsed_data = {
"receipt_id": "RCP-999",
"purchase_date": "2026-04-01",
"total": Decimal("42.00"),
"items": [
{
"product_name_raw": "ORGANIC BANANAS",
"quantity": Decimal("1"),
"unit_price": Decimal("0.69"),
"extended_price": Decimal("0.69"),
},
],
}
mock_parser = MagicMock()
mock_parser.parse.return_value = parsed_data
with (
patch(
"receiptwitness.worker.email_worker.resolve_user",
return_value="user-uuid-42",
),
patch.dict(
"receiptwitness.worker.email_worker.PARSERS",
{"meijer": mock_parser},
clear=False,
),
patch(
"receiptwitness.worker.email_worker.publish_receipt_ingested",
new_callable=AsyncMock,
) as mock_publish,
):
result = await process_job("msg-id-1", sample_email_job)
assert result is True
mock_parser.parse.assert_called_once()
mock_publish.assert_called_once_with(
user_id="user-uuid-42",
store_slug="meijer",
purchase_id="RCP-999",
purchase_date="2026-04-01",
item_count=1,
total=Decimal("42.00"),
)
+61
View File
@@ -0,0 +1,61 @@
# seed-dev-job.yaml
# K8s Job to run the CartSnitch seed runner against the dev database.
#
# Usage:
# kubectl apply -f seed-dev-job.yaml -n cartsnitch-dev
#
# To view logs:
# kubectl logs -n cartsnitch-dev job/seed-dev -f
#
# To re-run after fixing issues:
# kubectl delete -f seed-dev-job.yaml -n cartsnitch-dev && kubectl apply -f seed-dev-job.yaml -n cartsnitch-dev
#
apiVersion: batch/v1
kind: Job
metadata:
name: seed-dev
namespace: cartsnitch-dev
labels:
app: cartsnitch
component: seed
environment: dev
annotations:
description: "Runs cartsnitch-common seed runner to populate dev database with realistic test data."
spec:
# Prevent retries — a failed seed run should be investigated, not auto-repeated.
backoffLimit: 0
# Do not run concurrently; sequential runs are safer for truncate+reseed.
concurrencyPolicy: Forbid
template:
metadata:
labels:
app: cartsnitch
component: seed
environment: dev
spec:
restartPolicy: Never
containers:
- name: seed
# Use slim Python image with the cartsnitch-common package installed from git.
# The common repo is public; no additional secret is needed for the pip install.
image: python:3.12-slim
command:
- sh
- -c
- |
pip install --no-cache-dir "cartsnitch-common @ git+https://github.com/cartsnitch/common.git@main" && \
python -m cartsnitch_common.seed --database-url "$${DATABASE_URL}"
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: cartsnitch-secrets
key: database-url-pg
optional: false
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
+104
View File
@@ -0,0 +1,104 @@
#!/usr/bin/env bash
# =============================================================================
# seed-dev.sh — Run the CartSnitch seed runner against the dev database.
#
# Usage:
# ./seed-dev.sh Run full seed against dev
# ./seed-dev.sh --dry-run Show planned record counts without writing
# ./seed-dev.sh --help Show this help
#
# Prerequisites:
# - kubectl configured for the cartsnitch-dev cluster
# - Namespace cartsnitch-dev exists (CNPG Postgres must be running)
#
# What it does:
# 1. Starts a background port-forward to cartsnitch-pg-rw:5432
# 2. Waits for the tunnel to be ready
# 3. Runs python -m cartsnitch_common.seed with --database-url pointing
# to localhost:<forwarded-port>/cartsnitch
# 4. Cleans up the port-forward on exit (normal, interrupt, or error)
# =============================================================================
set -euo pipefail
# --- Config -------------------------------------------------------------------
readonly NAMESPACE="cartsnitch-dev"
readonly SVC_NAME="cartsnitch-pg-rw"
readonly LOCAL_PORT="5433" # use a non-privileged port to avoid conflicts
readonly DB_NAME="cartsnitch"
readonly PG_USER="cartsnitch"
# Retrieve password from the CNPG credentials secret
readonly PG_PASSWORD="$(
kubectl get secret cartsnitch-pg-credentials \
-n "$NAMESPACE" \
-o jsonpath='{.data.password}' \
| base64 -d
)"
readonly DB_URL="postgresql://${PG_USER}:${PG_PASSWORD}@localhost:${LOCAL_PORT}/${DB_NAME}"
# --- Helpers ------------------------------------------------------------------
log() { echo "[seed-dev] $*"; }
fail() { log "ERROR: $*" >&2; exit 1; }
# Cleanup port-forward and exit.
cleanup() {
if [[ -n "${PF_PID:-}" ]]; then
log "Stopping port-forward (PID $PF_PID)..."
kill "$PF_PID" 2>/dev/null || true
wait "$PF_PID" 2>/dev/null || true
fi
}
trap cleanup EXIT
# --- Args ---------------------------------------------------------------------
DRY_RUN=""
HELP_FLAG=""
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) DRY_RUN="--dry-run"; shift ;;
--help) HELP_FLAG="1"; shift ;;
*) fail "Unknown argument: $1";;
esac
done
if [[ -n "$HELP_FLAG" ]]; then
sed -n '3,/^# ---/p' "$0" | head -n -1 | sed 's/^# //'
echo ""
echo "Additional arguments are passed through to the seed runner."
echo "Common seed-runner options:"
echo " --dry-run Show planned record counts without writing"
echo " --seed N Set random seed (default: 42)"
exit 0
fi
# --- Prerequisites ------------------------------------------------------------
if ! command -v kubectl &>/dev/null; then
fail "kubectl not found — must be installed and configured."
fi
# --- Port-forward -------------------------------------------------------------
log "Starting port-forward ${SVC_NAME}:5432 -> localhost:${LOCAL_PORT} ..."
kubectl port-forward \
-n "$NAMESPACE" \
svc/"$SVC_NAME" \
"${LOCAL_PORT}:5432" \
&>/dev/null &
PF_PID=$!
# Give the tunnel a moment to establish
sleep 2
# Verify the tunnel is up
if ! kill -0 "$PF_PID" 2>/dev/null; then
fail "Port-forward failed to start."
fi
log "Port-forward active (PID $PF_PID) on localhost:${LOCAL_PORT}"
# --- Seed --------------------------------------------------------------------
log "Running seed against dev database..."
set -x
python -m cartsnitch_common.seed --database-url "$DB_URL" $DRY_RUN
set +x
log "Done."
+2 -2
View File
@@ -35,7 +35,7 @@ export function useProduct(id: string) {
export function usePriceHistory(productId: string) {
return useQuery({
queryKey: ['priceHistory', productId],
queryFn: () => api.get<PriceHistory[]>(`/products/${productId}/price-history`),
queryFn: () => api.get<PriceHistory[]>(`/products/${productId}/prices`),
enabled: !!productId,
})
}
@@ -50,6 +50,6 @@ export function useCoupons() {
export function usePriceAlerts() {
return useQuery({
queryKey: ['priceAlerts'],
queryFn: () => api.get<PriceAlert[]>('/price-alerts'),
queryFn: () => api.get<PriceAlert[]>('/alerts'),
})
}
+2 -2
View File
@@ -15,7 +15,7 @@ const mockRoutes: Record<string, (path: string) => unknown> = {
'/purchases': () => mockPurchases,
'/products': () => mockProducts,
'/coupons': () => mockCoupons,
'/price-alerts': () => mockAlerts,
'/alerts': () => mockAlerts,
}
function matchMockRoute<T>(path: string): T | null {
@@ -30,7 +30,7 @@ function matchMockRoute<T>(path: string): T | null {
}
// /products/:id/price-history
const priceHistoryMatch = path.match(/^\/products\/(.+)\/price-history$/)
const priceHistoryMatch = path.match(/^\/products\/(.+)\/prices$/)
if (priceHistoryMatch) {
return getMockPriceHistory(priceHistoryMatch[1]) as T
}
+3 -31
View File
@@ -1,13 +1,8 @@
import React, { Suspense } from 'react'
import { Link } from 'react-router-dom'
import { authClient } from '../lib/auth-client.ts'
import { usePurchases, usePriceAlerts, usePriceHistory } from '../hooks/useApi.ts'
import { usePurchases, usePriceAlerts } from '../hooks/useApi.ts'
import { StoreIcon } from '../components/StoreIcon.tsx'
const LazySparklineCard = React.lazy(() =>
import('../components/SparklineChart.tsx').then((mod) => ({ default: mod.SparklineCard }))
)
export function Dashboard() {
const { data: session, isPending } = authClient.useSession()
@@ -44,19 +39,11 @@ export function Dashboard() {
function AuthenticatedDashboard({ userName }: { userName: string }) {
const { data: purchases = [], isLoading: purchasesLoading } = usePurchases()
const { data: alerts = [], isLoading: alertsLoading } = usePriceAlerts()
const { data: eggHistory = [] } = usePriceHistory('prod10')
const { data: milkHistory = [] } = usePriceHistory('prod1')
const triggeredAlerts = alerts.filter((a) => a.triggered)
const watchingAlerts = alerts.filter((a) => !a.triggered)
const recentPurchases = purchases.slice(0, 3)
const sparklineData = eggHistory.filter((p) => p.storeId === 'meijer').slice(-8)
const milkSparkline = milkHistory.filter((p) => p.storeId === 'kroger').slice(-8)
const eggCurrent = sparklineData.length > 0 ? `$${sparklineData[sparklineData.length - 1].price.toFixed(2)}` : '—'
const milkCurrent = milkSparkline.length > 0 ? `$${milkSparkline[milkSparkline.length - 1].price.toFixed(2)}` : '—'
if (purchasesLoading || alertsLoading) {
return <DashboardSkeleton />
}
@@ -106,11 +93,8 @@ function AuthenticatedDashboard({ userName }: { userName: string }) {
{/* Price trend sparklines */}
<section className="mt-6">
<h2 className="mb-3 text-lg font-semibold text-gray-700">Price Trends</h2>
<div className="space-y-3">
<Suspense fallback={<SparklinePlaceholder />}>
<LazySparklineCard label="Eggs (dozen)" data={sparklineData} current={eggCurrent} />
<LazySparklineCard label="Whole Milk (1 gal)" data={milkSparkline} current={milkCurrent} />
</Suspense>
<div className="rounded-xl bg-white p-4 shadow-sm text-center text-sm text-gray-400">
Connect a store to see price trends
</div>
</section>
@@ -187,15 +171,3 @@ function DashboardSkeleton() {
</div>
)
}
function SparklinePlaceholder() {
return (
<div className="flex items-center gap-4 rounded-xl bg-white p-4 shadow-sm animate-pulse">
<div className="min-w-0 flex-1">
<div className="h-4 w-24 rounded bg-gray-200" />
<div className="mt-2 h-6 w-16 rounded bg-gray-200" />
</div>
<div className="h-10 w-24 rounded bg-gray-100" />
</div>
)
}
+45
View File
@@ -1,3 +1,4 @@
import { useState, useEffect } from 'react'
import { Link, useNavigate } from 'react-router-dom'
import { authClient } from '../lib/auth-client.ts'
import { useAuthStore } from '../stores/auth.ts'
@@ -9,6 +10,26 @@ export function Settings() {
const setAuthenticated = useAuthStore((s) => s.setAuthenticated)
const navigate = useNavigate()
const { theme, setTheme } = useThemeStore()
const [emailInAddress, setEmailInAddress] = useState<string | null>(null)
const [copied, setCopied] = useState(false)
useEffect(() => {
if (!session?.user) return
fetch('/api/v1/me/email-in-address', {
credentials: 'include',
})
.then((res) => res.json())
.then((data) => setEmailInAddress(data.email_address))
.catch(() => setEmailInAddress(null))
}, [session])
async function handleCopyEmail() {
if (emailInAddress) {
await navigator.clipboard.writeText(emailInAddress)
setCopied(true)
setTimeout(() => setCopied(false), 2000)
}
}
const user = session?.user
const connectedStores: string[] = []
@@ -113,6 +134,30 @@ export function Settings() {
</button>
</div>
</section>
{/* Receipt Email section */}
<section className="mt-6">
<h2 className="mb-3 text-sm font-semibold text-gray-500">Receipt Email</h2>
<div className="rounded-xl bg-white p-4 shadow-sm">
<p className="mb-2 text-sm text-gray-600">
Forward your digital receipt emails to this address:
</p>
<div className="flex items-center gap-2">
<code className="flex-1 rounded-lg bg-gray-100 px-3 py-2 text-sm font-mono text-gray-800 truncate">
{emailInAddress ?? 'Loading...'}
</code>
<button
onClick={handleCopyEmail}
className="rounded-lg bg-brand-blue px-3 py-2 text-sm font-medium text-white hover:bg-brand-blue/90 transition-colors"
>
{copied ? 'Copied!' : 'Copy'}
</button>
</div>
<p className="mt-2 text-xs text-gray-400">
Supports Meijer, Kroger, and Target receipt emails.
</p>
</div>
</section>
</div>
)
}
+1 -1
View File
@@ -61,5 +61,5 @@ export const handlers = [
http.get('/api/v1/products', () => HttpResponse.json(mockProducts)),
http.get('/api/v1/products/prod_1', () => HttpResponse.json(mockProducts[0])),
http.get('/api/v1/coupons', () => HttpResponse.json(mockCoupons)),
http.get('/api/v1/price-alerts', () => HttpResponse.json(mockAlerts)),
http.get('/api/v1/alerts', () => HttpResponse.json(mockAlerts)),
]