Compare commits

..

2 Commits

Author SHA1 Message Date
CartSnitch Engineer Bot e151873bb3 Merge main into fix/restore-token-hash
Sync with upstream changes (frontend API route alignment) while
preserving the SHA-256 token hashing fix.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 10:22:52 +00:00
CartSnitch Engineer Bot 3f9c683522 fix(api): restore SHA-256 session token hashing (regression from PR #95)
Better-Auth v1.5.6+ stores tokens as SHA-256 hashes in the sessions
table. The raw cookie value must be hashed before querying so that
stored-hash == computed-hash, restoring auth on all data endpoints.

Also adopts SESSION_COOKIE_NAMES list from PR #95 so both pending PRs
(cookie fix and hash fix) can merge without conflict.

Fixes CAR-322. Regression from PR #95 (fix/secure-cookie-name).

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-04-01 08:39:20 +00:00
78 changed files with 880 additions and 4381 deletions
+32 -275
View File
@@ -2,9 +2,9 @@ name: CI
on:
push:
branches: [main, dev, uat]
branches: [main]
pull_request:
branches: [main, dev, uat]
branches: [main]
concurrency:
group: ci-${{ github.ref }}
@@ -13,7 +13,6 @@ concurrency:
permissions:
contents: write
packages: write
security-events: write
env:
REGISTRY: ghcr.io
@@ -100,11 +99,10 @@ jobs:
build-and-push:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [lint, test, e2e]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
@@ -128,14 +126,14 @@ jobs:
echo "CalVer tag: $VERSION"
- name: Log in to Docker Hub
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
@@ -148,46 +146,21 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build Docker image
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
load: true
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: prod
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-frontend.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-frontend.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
@@ -196,11 +169,10 @@ jobs:
build-and-push-auth:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [lint, test, e2e]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
@@ -223,14 +195,14 @@ jobs:
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
- name: Log in to Docker Hub
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
@@ -243,52 +215,25 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.AUTH_IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build Docker image
- name: Build and push auth Docker image
uses: docker/build-push-action@v6
with:
context: ./auth
file: ./auth/Dockerfile
load: true
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.AUTH_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-auth.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-auth.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: ./auth
file: ./auth/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
build-and-push-receiptwitness:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [lint, test]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
@@ -306,14 +251,14 @@ jobs:
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
- name: Log in to Docker Hub
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
@@ -326,52 +271,25 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.RECEIPTWITNESS_IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build Docker image
- name: Build and push receiptwitness image
uses: docker/build-push-action@v6
with:
context: .
file: ./receiptwitness/Dockerfile
load: true
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.RECEIPTWITNESS_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-receiptwitness.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-receiptwitness.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: .
file: ./receiptwitness/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
build-and-push-api:
runs-on: runners-cartsnitch
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
needs: [lint, test]
outputs:
calver_tag: ${{ steps.calver.outputs.version }}
sha_tag: sha-${{ github.sha }}
steps:
- uses: actions/checkout@v4
with:
@@ -389,14 +307,14 @@ jobs:
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
- name: Log in to Docker Hub
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push'
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
@@ -409,49 +327,23 @@ jobs:
with:
images: ${{ env.REGISTRY }}/${{ env.API_IMAGE_NAME }}
tags: |
type=sha,prefix=sha-,format=long
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build Docker image
- name: Build and push API Docker image
uses: docker/build-push-action@v6
with:
context: ./api
context: .
file: ./api/Dockerfile
load: true
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@0.28.0
with:
image-ref: ${{ env.REGISTRY }}/${{ env.API_IMAGE_NAME }}:sha-${{ github.sha }}
format: 'sarif'
output: 'trivy-results-api.sarif'
severity: 'CRITICAL,HIGH'
exit-code: '1'
- name: Upload Trivy scan results to GitHub Security
uses: github/codeql-action/upload-sarif@v3
if: always()
with:
sarif_file: 'trivy-results-api.sarif'
- name: Push Docker image
if: github.event_name == 'push'
uses: docker/build-push-action@v6
with:
context: ./api
file: ./api/Dockerfile
push: true
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
deploy-dev:
runs-on: runners-cartsnitch
needs: [build-and-push, build-and-push-auth, build-and-push-receiptwitness, build-and-push-api]
if: always() && !cancelled() && github.event_name == 'push' && (github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main')
if: always() && !cancelled() && github.event_name == 'push' && github.ref == 'refs/heads/main'
steps:
- name: Generate GitHub App token
id: app-token
@@ -476,65 +368,29 @@ jobs:
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Determine image tag for frontend
id: frontend_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
- name: Update frontend image tag
if: needs.build-and-push.result == 'success'
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/cartsnitch:${{ steps.frontend_tag.outputs.tag }}
- name: Determine image tag for auth
id: auth_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-auth.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-auth.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
kustomize edit set image ghcr.io/cartsnitch/cartsnitch:${{ needs.build-and-push.outputs.calver_tag }}
- name: Update auth image tag
if: needs.build-and-push-auth.result == 'success'
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/auth:${{ steps.auth_tag.outputs.tag }}
- name: Determine image tag for receiptwitness
id: receiptwitness_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-receiptwitness.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-receiptwitness.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
kustomize edit set image ghcr.io/cartsnitch/auth:${{ needs.build-and-push-auth.outputs.calver_tag }}
- name: Update receiptwitness image tag
if: needs.build-and-push-receiptwitness.result == 'success'
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ steps.receiptwitness_tag.outputs.tag }}
- name: Determine image tag for api
id: api_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-api.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-api.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ needs.build-and-push-receiptwitness.outputs.calver_tag }}
- name: Update api image tag
if: needs.build-and-push-api.result == 'success'
run: |
cd infra/apps/overlays/dev
kustomize edit set image ghcr.io/cartsnitch/api:${{ steps.api_tag.outputs.tag }}
kustomize edit set image ghcr.io/cartsnitch/api:${{ needs.build-and-push-api.outputs.calver_tag }}
- name: Commit and push to infra
run: |
@@ -543,103 +399,4 @@ jobs:
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/dev/kustomization.yaml
git commit -m "ci(dev): update cartsnitch, auth, receiptwitness, and api images"
git pull --rebase origin main
git push origin main
deploy-uat:
runs-on: runners-cartsnitch
needs: [build-and-push, build-and-push-auth, build-and-push-receiptwitness, build-and-push-api]
if: always() && !cancelled() && github.event_name == 'push' && (github.ref == 'refs/heads/uat' || github.ref == 'refs/heads/main')
steps:
- name: Generate GitHub App token
id: app-token
uses: actions/create-github-app-token@v1
with:
app-id: ${{ secrets.CARTSNITCH_APP_ID }}
private-key: ${{ secrets.CARTSNITCH_APP_PRIVATE_KEY }}
owner: ${{ github.repository_owner }}
repositories: infra
- name: Checkout infra repo
uses: actions/checkout@v4
with:
repository: cartsnitch/infra
token: ${{ steps.app-token.outputs.token }}
ref: main
path: infra
- name: Install kubectl
uses: azure/setup-kubectl@v4
- name: Install kustomize
uses: imranismail/setup-kustomize@v2
- name: Determine image tag for frontend
id: frontend_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
- name: Update frontend image tag
if: needs.build-and-push.result == 'success'
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/cartsnitch:${{ steps.frontend_tag.outputs.tag }}
- name: Determine image tag for auth
id: auth_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-auth.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-auth.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
- name: Update auth image tag
if: needs.build-and-push-auth.result == 'success'
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/auth:${{ steps.auth_tag.outputs.tag }}
- name: Determine image tag for receiptwitness
id: receiptwitness_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-receiptwitness.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-receiptwitness.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
- name: Update receiptwitness image tag
if: needs.build-and-push-receiptwitness.result == 'success'
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/receiptwitness:${{ steps.receiptwitness_tag.outputs.tag }}
- name: Determine image tag for api
id: api_tag
run: |
if [ "${{ github.ref }}" == "refs/heads/main" ]; then
echo "tag=${{ needs.build-and-push-api.outputs.calver_tag }}" >> "$GITHUB_OUTPUT"
else
echo "tag=${{ needs.build-and-push-api.outputs.sha_tag }}" >> "$GITHUB_OUTPUT"
fi
- name: Update api image tag
if: needs.build-and-push-api.result == 'success'
run: |
cd infra/apps/overlays/uat
kustomize edit set image ghcr.io/cartsnitch/api:${{ steps.api_tag.outputs.tag }}
- name: Commit and push to infra
run: |
cd infra
git config user.name "cartsnitch-ci[bot]"
git config user.email "cartsnitch-ci[bot]@users.noreply.github.com"
git add apps/overlays/uat/kustomization.yaml
git commit -m "ci(uat): update cartsnitch, auth, receiptwitness, and api images"
git pull --rebase origin main
git push origin main
-164
View File
@@ -1,164 +0,0 @@
name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
concurrency:
group: ci-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
packages: write
env:
REGISTRY: ghcr.io
IMAGE_NAME: cartsnitch/api
jobs:
lint:
runs-on: runners-cartsnitch
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- run: pip install ruff
- name: Ruff lint
run: ruff check .
- name: Ruff format check
run: ruff format --check .
typecheck:
runs-on: runners-cartsnitch
continue-on-error: true
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libpq-dev build-essential
- name: Install cartsnitch-common from GitHub
run: pip install "cartsnitch-common @ git+https://github.com/cartsnitch/common.git"
- run: pip install -e ".[dev]" mypy
- name: Type check
run: mypy src/cartsnitch_api
test:
runs-on: runners-cartsnitch
services:
postgres:
image: postgres:15-alpine
credentials:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
env:
POSTGRES_USER: cartsnitch
POSTGRES_PASSWORD: cartsnitch_test
POSTGRES_DB: cartsnitch_test
ports:
- 5432:5432
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
redis:
image: redis:7-alpine
credentials:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
ports:
- 6379:6379
options: >-
--health-cmd "redis-cli ping"
--health-interval 10s
--health-timeout 5s
--health-retries 5
env:
CARTSNITCH_DATABASE_URL: postgresql+asyncpg://cartsnitch:cartsnitch_test@localhost:5432/cartsnitch_test
CARTSNITCH_REDIS_URL: redis://localhost:6379/0
CARTSNITCH_JWT_SECRET_KEY: test-secret-do-not-use-in-prod
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip
- name: Install system dependencies
run: sudo apt-get update && sudo apt-get install -y libpq-dev build-essential
- name: Install cartsnitch-common from GitHub
run: pip install "cartsnitch-common @ git+https://github.com/cartsnitch/common.git"
- run: pip install -e ".[dev]"
- name: Run tests
run: pytest --tb=short -q
build-and-push:
runs-on: runners-cartsnitch
needs: [lint, test]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Generate CalVer tag
id: calver
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
DATE_TAG=$(date -u +%Y.%m.%d)
EXISTING=$(git tag -l "v${DATE_TAG}*" | sort -V | tail -1)
if [ -z "$EXISTING" ]; then
VERSION="$DATE_TAG"
elif [ "$EXISTING" = "v${DATE_TAG}" ]; then
VERSION="${DATE_TAG}.2"
else
BUILD_NUM=$(echo "$EXISTING" | sed "s/v${DATE_TAG}\.//")
VERSION="${DATE_TAG}.$((BUILD_NUM + 1))"
fi
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
echo "CalVer tag: $VERSION"
- name: Log in to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Log in to GHCR
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
uses: docker/login-action@v3
with:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
tags: |
type=sha,prefix=sha-
type=raw,value=${{ steps.calver.outputs.version }},enable=${{ github.ref == 'refs/heads/main' }}
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
push: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
target: prod
- name: Create git tag
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
run: |
git tag "v${{ steps.calver.outputs.version }}"
git push origin "v${{ steps.calver.outputs.version }}"
+9 -6
View File
@@ -1,3 +1,5 @@
# Stage 1: Build dependencies
# Build context is the repo root. Paths below are relative to the root.
FROM python:3.12-slim AS build
RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -6,10 +8,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY pyproject.toml ./
COPY src/ ./src/
COPY api/pyproject.toml ./
COPY api/src/ ./src/
RUN pip install --no-cache-dir --prefix=/install .
# Stage 2: Production image
FROM python:3.12-slim AS prod
RUN apt-get update && apt-get install -y --no-install-recommends libpq5 && rm -rf /var/lib/apt/lists/*
@@ -17,9 +20,9 @@ RUN apt-get update && apt-get install -y --no-install-recommends libpq5 && rm -r
WORKDIR /app
RUN adduser --system --group --uid 1000 app
COPY --from=build /install /usr/local
COPY src/ ./src/
COPY alembic.ini ./
COPY alembic/ ./alembic/
COPY api/src/ ./src/
COPY api/alembic.ini ./
COPY api/alembic/ ./alembic/
USER 1000
EXPOSE 8000
@@ -27,4 +30,4 @@ EXPOSE 8000
HEALTHCHECK --interval=30s --timeout=3s \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn cartsnitch_api.main:app --host 0.0.0.0 --port 8000"]
CMD ["sh", "-c", "python -m alembic upgrade head && uvicorn cartsnitch_api.main:app --host 0.0.0.0 --port 8000"]
+2 -14
View File
@@ -18,7 +18,7 @@ if not db_url:
"CARTSNITCH_DATABASE_URL_SYNC must be set. "
"Example: postgresql://user:pass@localhost:5432/cartsnitch"
)
config.set_main_option("sqlalchemy.url", db_url.replace("%", "%%"))
config.set_main_option("sqlalchemy.url", db_url)
target_metadata = Base.metadata
@@ -31,7 +31,6 @@ def run_migrations_offline() -> None:
target_metadata=target_metadata,
literal_binds=True,
dialect_opts={"paramstyle": "named"},
version_table_column_width=128,
)
with context.begin_transaction():
context.run_migrations()
@@ -45,20 +44,9 @@ def run_migrations_online() -> None:
poolclass=pool.NullPool,
)
with connectable.connect() as connection:
context.configure(connection=connection, target_metadata=target_metadata, version_table_column_width=128)
context.configure(connection=connection, target_metadata=target_metadata)
with context.begin_transaction():
context.run_migrations()
# Create any tables defined in models but not yet created by migrations.
# This bootstraps fresh databases that have no legacy schema.
# checkfirst=True ensures this is a no-op on existing databases.
try:
Base.metadata.create_all(bind=connection, checkfirst=True)
connection.commit()
except Exception as exc:
import logging
logging.getLogger("alembic.env").warning(
"create_all failed (non-fatal, migrations should handle table creation): %s", exc
)
if context.is_offline_mode():
@@ -33,21 +33,6 @@ def _is_fernet_token(value: str) -> bool:
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# Fresh DB — table created by Base.metadata.create_all with correct TEXT type
if not inspector.has_table("user_store_accounts"):
return
# Already migrated? Skip if session_data is already TEXT (not JSON)
cols = {c["name"]: c for c in inspector.get_columns("user_store_accounts")}
if "session_data" not in cols:
return
col_type = str(cols["session_data"]["type"]).lower()
if "text" in col_type and "json" not in col_type:
return # already TEXT — nothing to do
# Change column type from JSON to TEXT to hold Fernet ciphertext
op.alter_column(
"user_store_accounts",
@@ -58,6 +43,7 @@ def upgrade() -> None:
postgresql_using="session_data::text",
)
conn = op.get_bind()
rows = conn.execute(
text("SELECT id, session_data FROM user_store_accounts WHERE session_data IS NOT NULL")
).fetchall()
+65 -78
View File
@@ -21,94 +21,81 @@ depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# --- Extend users table for Better-Auth compatibility ---
# Guard: on a fresh DB Base.metadata.create_all (called in env.py after migrations)
# creates the users table with all columns, so migration 002 must not re-run add_column.
if inspector.has_table("users"):
existing_user_cols = [c["name"] for c in inspector.get_columns("users")]
if "email_verified" not in existing_user_cols:
op.add_column("users", sa.Column("email_verified", sa.Boolean(), nullable=False, server_default="false"))
if "image" not in existing_user_cols:
op.add_column("users", sa.Column("image", sa.Text(), nullable=True))
op.add_column("users", sa.Column("email_verified", sa.Boolean(), nullable=False, server_default="false"))
op.add_column("users", sa.Column("image", sa.Text(), nullable=True))
# --- Create sessions table ---
if not inspector.has_table("sessions"):
op.create_table(
"sessions",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("token", sa.Text(), nullable=False),
sa.Column("user_id", sa.Text(), nullable=False),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("ip_address", sa.Text(), nullable=True),
sa.Column("user_agent", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_sessions_token", "sessions", ["token"], unique=True)
op.create_index("ix_sessions_user_id", "sessions", ["user_id"])
op.create_table(
"sessions",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("token", sa.Text(), nullable=False),
sa.Column("user_id", sa.Text(), nullable=False),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("ip_address", sa.Text(), nullable=True),
sa.Column("user_agent", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_sessions_token", "sessions", ["token"], unique=True)
op.create_index("ix_sessions_user_id", "sessions", ["user_id"])
# --- Create accounts table ---
if not inspector.has_table("accounts"):
op.create_table(
"accounts",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("user_id", sa.Text(), nullable=False),
sa.Column("account_id", sa.Text(), nullable=False),
sa.Column("provider_id", sa.Text(), nullable=False),
sa.Column("access_token", sa.Text(), nullable=True),
sa.Column("refresh_token", sa.Text(), nullable=True),
sa.Column("access_token_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("refresh_token_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("scope", sa.Text(), nullable=True),
sa.Column("id_token", sa.Text(), nullable=True),
sa.Column("password", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_accounts_user_id", "accounts", ["user_id"])
op.create_table(
"accounts",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("user_id", sa.Text(), nullable=False),
sa.Column("account_id", sa.Text(), nullable=False),
sa.Column("provider_id", sa.Text(), nullable=False),
sa.Column("access_token", sa.Text(), nullable=True),
sa.Column("refresh_token", sa.Text(), nullable=True),
sa.Column("access_token_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("refresh_token_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("scope", sa.Text(), nullable=True),
sa.Column("id_token", sa.Text(), nullable=True),
sa.Column("password", sa.Text(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_index("ix_accounts_user_id", "accounts", ["user_id"])
# --- Create verifications table ---
if not inspector.has_table("verifications"):
op.create_table(
"verifications",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("identifier", sa.Text(), nullable=False),
sa.Column("value", sa.Text(), nullable=False),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
op.create_table(
"verifications",
sa.Column("id", sa.Text(), nullable=False),
sa.Column("identifier", sa.Text(), nullable=False),
sa.Column("value", sa.Text(), nullable=False),
sa.Column("expires_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.PrimaryKeyConstraint("id"),
)
# --- Migrate existing password hashes to accounts table ---
# Only run on existing (non-fresh) DBs that already have users table with data
if inspector.has_table("users"):
users = conn.execute(
text("SELECT id, hashed_password FROM users WHERE hashed_password IS NOT NULL")
).fetchall()
# For each user with a hashed_password, create a 'credential' account row
conn = op.get_bind()
users = conn.execute(
text("SELECT id, hashed_password FROM users WHERE hashed_password IS NOT NULL")
).fetchall()
for user_id, hashed_password in users:
user_id_str = str(user_id)
conn.execute(
text(
"INSERT INTO accounts (id, user_id, account_id, provider_id, password, created_at, updated_at) "
"VALUES (gen_random_uuid()::text, :user_id, :account_id, 'credential', :password, now(), now())"
),
{"user_id": user_id_str, "account_id": user_id_str, "password": hashed_password},
)
for user_id, hashed_password in users:
user_id_str = str(user_id)
conn.execute(
text(
"INSERT INTO accounts (id, user_id, account_id, provider_id, password, created_at, updated_at) "
"VALUES (gen_random_uuid()::text, :user_id, :account_id, 'credential', :password, now(), now())"
),
{"user_id": user_id_str, "account_id": user_id_str, "password": hashed_password},
)
def downgrade() -> None:
op.execute(text("DROP INDEX IF EXISTS ix_accounts_user_id"))
op.execute(text("DROP TABLE IF EXISTS verifications"))
op.execute(text("DROP TABLE IF EXISTS accounts"))
op.execute(text("DROP INDEX IF EXISTS ix_sessions_user_id"))
op.execute(text("DROP INDEX IF EXISTS ix_sessions_token"))
op.execute(text("DROP TABLE IF EXISTS sessions"))
op.execute(text("ALTER TABLE users DROP COLUMN IF EXISTS image"))
op.execute(text("ALTER TABLE users DROP COLUMN IF EXISTS email_verified"))
op.drop_table("verifications")
op.drop_table("accounts")
op.drop_index("ix_sessions_user_id", table_name="sessions")
op.drop_index("ix_sessions_token", table_name="sessions")
op.drop_table("sessions")
op.drop_column("users", "image")
op.drop_column("users", "email_verified")
@@ -19,25 +19,8 @@ depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# Fresh DB — nothing to alter
if not inspector.has_table("users"):
return
cols = {c["name"]: c for c in inspector.get_columns("users")}
if "hashed_password" in cols and not cols["hashed_password"]["nullable"]:
op.alter_column("users", "hashed_password", existing_type=sa.String(255), nullable=True)
op.alter_column("users", "hashed_password", existing_type=sa.String(255), nullable=True)
def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
if not inspector.has_table("users"):
return
cols = {c["name"]: c for c in inspector.get_columns("users")}
if "hashed_password" in cols and cols["hashed_password"]["nullable"]:
op.alter_column("users", "hashed_password", existing_type=sa.String(255), nullable=False)
op.alter_column("users", "hashed_password", existing_type=sa.String(255), nullable=False)
+1 -15
View File
@@ -25,21 +25,7 @@ depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# Fresh DB — no tables yet, nothing to convert
if not inspector.has_table("users"):
return
# Check if already TEXT (Base.metadata.create_all uses TEXT for fresh DB)
users_cols = {c["name"]: c for c in inspector.get_columns("users")}
if "id" in users_cols:
id_type = str(users_cols["id"]["type"]).lower()
if "text" in id_type and "uuid" not in id_type:
return # already TEXT — nothing to do
# Step 1: Drop existing FK constraints (ignore if they don't exist)
# Step 1: Drop existing FK constraints
op.execute(text("ALTER TABLE user_store_accounts DROP CONSTRAINT IF EXISTS user_store_accounts_user_id_fkey"))
op.execute(text("ALTER TABLE purchases DROP CONSTRAINT IF EXISTS purchases_user_id_fkey"))
@@ -1,57 +0,0 @@
"""Add email_inbound_token to users.
Revision ID: 005_add_email_inbound_token
Revises: 004_fix_user_id_text
Create Date: 2026-04-02
"""
import secrets
import sqlalchemy as sa
from alembic import op
revision = "005_add_email_inbound_token"
down_revision = "004_fix_user_id_text"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# Guard: on a fresh DB Base.metadata.create_all creates users table with the column already present
if not inspector.has_table("users"):
return
existing_cols = [c["name"] for c in inspector.get_columns("users")]
if "email_inbound_token" in existing_cols:
return
# Add column nullable first so existing rows can be backfilled
op.add_column(
"users",
sa.Column("email_inbound_token", sa.String(22), nullable=True),
)
# Backfill existing users with unique tokens
result = conn.execute(sa.text("SELECT id FROM users WHERE email_inbound_token IS NULL"))
for (user_id,) in result:
token = secrets.token_urlsafe(16)
conn.execute(
sa.text("UPDATE users SET email_inbound_token = :token WHERE id = :id"),
{"token": token, "id": user_id},
)
# Now enforce non-null and unique
op.alter_column("users", "email_inbound_token", nullable=False)
op.create_index(
"ix_users_email_inbound_token",
"users",
["email_inbound_token"],
unique=True,
)
def downgrade() -> None:
op.drop_index("ix_users_email_inbound_token", table_name="users")
op.drop_column("users", "email_inbound_token")
@@ -1,42 +0,0 @@
"""Add server_default to users.email_inbound_token.
Revision ID: 006_email_inbound_token_server_default
Revises: 005_add_email_inbound_token
Create Date: 2026-04-04
"""
import sqlalchemy as sa
from alembic import op
revision = "006_email_inbound_token_server_default"
down_revision = "005_add_email_inbound_token"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# Guard: on a fresh DB Base.metadata.create_all already sets the server_default
if not inspector.has_table("users"):
return
cols = {c["name"]: c for c in inspector.get_columns("users")}
if "email_inbound_token" not in cols:
return
if cols["email_inbound_token"].get("default") is not None:
return
op.alter_column(
"users",
"email_inbound_token",
server_default=sa.text(
"replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')"
),
)
def downgrade() -> None:
op.alter_column(
"users",
"email_inbound_token",
server_default=None,
)
@@ -1,47 +0,0 @@
"""Bootstrap users table on fresh databases.
On fresh databases, migrations 001-006 skip users-table operations because
the table does not exist yet. Base.metadata.create_all() in env.py is meant
to handle this, but if it fails (import errors, etc.) the table is never
created. This migration creates the users table with raw SQL as a safety net.
Revision ID: 007_bootstrap_users_table
Revises: 006_email_inbound_token_server_default
Create Date: 2026-04-04
"""
import sqlalchemy as sa
from sqlalchemy import text
from alembic import op
revision = "007_bootstrap_users_table"
down_revision = "006_email_inbound_token_server_default"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
if inspector.has_table("users"):
return # Table already exists (non-fresh DB or create_all already ran)
conn.execute(text("""
CREATE TABLE users (
id TEXT PRIMARY KEY,
email VARCHAR(255) NOT NULL UNIQUE,
hashed_password VARCHAR(255),
display_name VARCHAR(100),
email_verified BOOLEAN NOT NULL DEFAULT false,
image TEXT,
email_inbound_token VARCHAR(22) NOT NULL UNIQUE
DEFAULT replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_'),
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
)
"""))
def downgrade() -> None:
op.execute(text("DROP TABLE IF EXISTS users"))
@@ -1,210 +0,0 @@
"""Create domain tables (stores, purchases, coupons, etc.).
Revision ID: 008_create_domain_tables
Revises: 007_bootstrap_users_table
Create Date: 2026-04-04
"""
import sqlalchemy as sa
from sqlalchemy import text
from alembic import op
revision = "008_create_domain_tables"
down_revision = "007_bootstrap_users_table"
branch_labels = None
depends_on = None
def upgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
# 1. stores
if not inspector.has_table("stores"):
op.create_table(
"stores",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("name", sa.String(100), nullable=False),
sa.Column("slug", sa.String(20), nullable=False, unique=True),
sa.Column("logo_url", sa.String(500), nullable=True),
sa.Column("website_url", sa.String(500), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 2. store_locations
if not inspector.has_table("store_locations"):
op.create_table(
"store_locations",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("address", sa.String(300), nullable=False),
sa.Column("city", sa.String(100), nullable=False),
sa.Column("state", sa.String(2), nullable=False),
sa.Column("zip", sa.String(10), nullable=False),
sa.Column("lat", sa.Float(), nullable=True),
sa.Column("lng", sa.Float(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 3. normalized_products
if not inspector.has_table("normalized_products"):
op.create_table(
"normalized_products",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("canonical_name", sa.String(300), nullable=False),
sa.Column("category", sa.String(50), nullable=True),
sa.Column("subcategory", sa.String(100), nullable=True),
sa.Column("brand", sa.String(200), nullable=True),
sa.Column("size", sa.String(50), nullable=True),
sa.Column("size_unit", sa.String(10), nullable=True),
sa.Column("upc_variants", sa.JSON(), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 4. purchases
if not inspector.has_table("purchases"):
op.create_table(
"purchases",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("user_id", sa.Text(), sa.ForeignKey("users.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("store_location_id", sa.Uuid(), sa.ForeignKey("store_locations.id"), nullable=True),
sa.Column("receipt_id", sa.String(200), nullable=False),
sa.Column("purchase_date", sa.Date(), nullable=False),
sa.Column("total", sa.Numeric(10, 2), nullable=False),
sa.Column("subtotal", sa.Numeric(10, 2), nullable=True),
sa.Column("tax", sa.Numeric(10, 2), nullable=True),
sa.Column("savings_total", sa.Numeric(10, 2), nullable=True),
sa.Column("source_url", sa.String(500), nullable=True),
sa.Column("raw_data", sa.JSON(), nullable=True),
sa.Column("ingested_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.UniqueConstraint("user_id", "store_id", "receipt_id", name="uq_purchase_receipt"),
sa.Index("ix_purchases_user_store", "user_id", "store_id"),
)
# 5. purchase_items
if not inspector.has_table("purchase_items"):
op.create_table(
"purchase_items",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("purchase_id", sa.Uuid(), sa.ForeignKey("purchases.id"), nullable=False),
sa.Column("product_name_raw", sa.String(300), nullable=False),
sa.Column("upc", sa.String(20), nullable=True),
sa.Column("quantity", sa.Numeric(10, 3), nullable=False),
sa.Column("unit_price", sa.Numeric(10, 2), nullable=False),
sa.Column("extended_price", sa.Numeric(10, 2), nullable=False),
sa.Column("regular_price", sa.Numeric(10, 2), nullable=True),
sa.Column("sale_price", sa.Numeric(10, 2), nullable=True),
sa.Column("coupon_discount", sa.Numeric(10, 2), nullable=True),
sa.Column("loyalty_discount", sa.Numeric(10, 2), nullable=True),
sa.Column("category_raw", sa.String(100), nullable=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 6. coupons
if not inspector.has_table("coupons"):
op.create_table(
"coupons",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=True),
sa.Column("title", sa.String(300), nullable=False),
sa.Column("description", sa.String(1000), nullable=True),
sa.Column("discount_type", sa.String(20), nullable=False),
sa.Column("discount_value", sa.Numeric(10, 2), nullable=True),
sa.Column("min_purchase", sa.Numeric(10, 2), nullable=True),
sa.Column("valid_from", sa.Date(), nullable=True),
sa.Column("valid_to", sa.Date(), nullable=True),
sa.Column("requires_clip", sa.Boolean(), server_default=text("false"), nullable=False),
sa.Column("coupon_code", sa.String(100), nullable=True),
sa.Column("source_url", sa.String(500), nullable=True),
sa.Column("scraped_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 7. price_history
if not inspector.has_table("price_history"):
op.create_table(
"price_history",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("observed_date", sa.Date(), nullable=False),
sa.Column("regular_price", sa.Numeric(10, 2), nullable=False),
sa.Column("sale_price", sa.Numeric(10, 2), nullable=True),
sa.Column("loyalty_price", sa.Numeric(10, 2), nullable=True),
sa.Column("coupon_price", sa.Numeric(10, 2), nullable=True),
sa.Column("source", sa.String(20), nullable=False),
sa.Column("purchase_item_id", sa.Uuid(), sa.ForeignKey("purchase_items.id"), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Index("ix_price_history_product_store_date", "normalized_product_id", "store_id", "observed_date"),
)
# 8. shrinkflation_events
if not inspector.has_table("shrinkflation_events"):
op.create_table(
"shrinkflation_events",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("normalized_product_id", sa.Uuid(), sa.ForeignKey("normalized_products.id"), nullable=False),
sa.Column("detected_date", sa.Date(), nullable=False),
sa.Column("old_size", sa.String(50), nullable=False),
sa.Column("new_size", sa.String(50), nullable=False),
sa.Column("old_unit", sa.String(10), nullable=True),
sa.Column("new_unit", sa.String(10), nullable=True),
sa.Column("price_at_old_size", sa.Numeric(10, 2), nullable=True),
sa.Column("price_at_new_size", sa.Numeric(10, 2), nullable=True),
sa.Column("confidence", sa.Numeric(3, 2), server_default=text("1.00"), nullable=False),
sa.Column("notes", sa.String(1000), nullable=True),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
)
# 9. user_store_accounts
if not inspector.has_table("user_store_accounts"):
op.create_table(
"user_store_accounts",
sa.Column("id", sa.Uuid(), server_default=text("gen_random_uuid()"), primary_key=True),
sa.Column("user_id", sa.Text(), sa.ForeignKey("users.id"), nullable=False),
sa.Column("store_id", sa.Uuid(), sa.ForeignKey("stores.id"), nullable=False),
sa.Column("session_data", sa.JSON(), nullable=True),
sa.Column("session_expires_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("last_sync_at", sa.DateTime(timezone=True), nullable=True),
sa.Column("status", sa.String(20), server_default=text("'active'"), nullable=False),
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.Column("updated_at", sa.DateTime(timezone=True), server_default=sa.func.now(), nullable=False),
sa.UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),
)
def downgrade() -> None:
conn = op.get_bind()
inspector = sa.inspect(conn)
if inspector.has_table("user_store_accounts"):
op.drop_table("user_store_accounts")
if inspector.has_table("shrinkflation_events"):
op.drop_table("shrinkflation_events")
if inspector.has_table("price_history"):
op.drop_table("price_history")
if inspector.has_table("coupons"):
op.drop_table("coupons")
if inspector.has_table("purchase_items"):
op.drop_table("purchase_items")
if inspector.has_table("purchases"):
op.drop_table("purchases")
if inspector.has_table("normalized_products"):
op.drop_table("normalized_products")
if inspector.has_table("store_locations"):
op.drop_table("store_locations")
if inspector.has_table("stores"):
op.drop_table("stores")
+25 -17
View File
@@ -5,6 +5,9 @@ Sessions are verified by querying the shared sessions table directly.
"""
from datetime import UTC, datetime
from hashlib import sha256
from uuid import UUID
from fastapi import Cookie, Depends, Header, HTTPException, Request, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from sqlalchemy import text
@@ -17,21 +20,25 @@ from cartsnitch_api.database import get_db
# but we support Bearer tokens for service-to-service or mobile clients.
bearer_scheme = HTTPBearer(auto_error=False)
# Better-Auth session cookie name
SESSION_COOKIE_NAME = "better-auth.session_token"
# Secure prefix used by better-auth on HTTPS deployments
SECURE_SESSION_COOKIE_NAME = "__Secure-better-auth.session_token"
# Better-Auth session cookie names.
# Over HTTPS Better-Auth adds the __Secure- prefix automatically.
SESSION_COOKIE_NAMES = [
"__Secure-better-auth.session_token", # HTTPS (deployed)
"better-auth.session_token", # HTTP (local dev)
]
async def _validate_session_token(token: str, db: AsyncSession) -> str:
async def _validate_session_token(token: str, db: AsyncSession) -> UUID:
"""Validate a Better-Auth session token against the sessions table.
Better-Auth stores the raw token in the DB. The cookie/Bearer header
carries the same raw token, so we compare directly.
Returns the user_id (as UUID) if the session is valid and not expired.
Better-Auth v1.5.6+ stores tokens as SHA-256 hashes, so we hash the
incoming raw token before querying.
"""
hashed_token = sha256(token.encode("utf-8")).hexdigest()
result = await db.execute(
text("SELECT user_id, expires_at FROM sessions WHERE token = :token"),
{"token": token},
{"token": hashed_token},
)
row = result.first()
@@ -52,14 +59,14 @@ async def _validate_session_token(token: str, db: AsyncSession) -> str:
detail="Session expired",
)
return str(user_id)
return UUID(str(user_id))
async def get_current_user(
request: Request,
credentials: HTTPAuthorizationCredentials | None = Depends(bearer_scheme),
db: AsyncSession = Depends(get_db),
) -> str:
) -> UUID:
"""Extract and validate the session token from cookie or Authorization header.
Checks in order:
@@ -68,17 +75,18 @@ async def get_current_user(
"""
token: str | None = None
# 1. Check session cookie — prefer __Secure- variant (HTTPS) over plain (HTTP dev)
cookie_token = request.cookies.get(SECURE_SESSION_COOKIE_NAME) or request.cookies.get(SESSION_COOKIE_NAME)
# 1. Check session cookie (try both names for HTTP/HTTPS compatibility)
cookie_token = None
for name in SESSION_COOKIE_NAMES:
cookie_token = request.cookies.get(name)
if cookie_token:
break
if cookie_token:
# Better-Auth cookie format is "token.sessionId" — extract just the token part
token = cookie_token.split(".")[0] if "." in cookie_token else cookie_token
token = cookie_token
# 2. Fall back to Bearer header
if not token and credentials:
# Callers might pass the compound value here too
raw = credentials.credentials
token = raw.split(".")[0] if "." in raw else raw
token = credentials.credentials
if not token:
raise HTTPException(
+5 -6
View File
@@ -5,14 +5,13 @@ the Better-Auth service (auth/). This router provides user profile
endpoints that query our own user data from the shared database.
"""
from uuid import UUID
from fastapi import APIRouter, Depends, HTTPException, status
from pydantic import BaseModel
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
from cartsnitch_api.auth.dependencies import get_current_user
from cartsnitch_api.database import get_db
from cartsnitch_api.models import User
from cartsnitch_api.schemas import (
UpdateUserRequest,
UserResponse,
@@ -24,7 +23,7 @@ router = APIRouter(prefix="/auth", tags=["auth"])
@router.get("/me", response_model=UserResponse)
async def get_me(
user_id: str = Depends(get_current_user),
user_id: UUID = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
@@ -39,7 +38,7 @@ async def get_me(
@router.patch("/me", response_model=UserResponse)
async def update_me(
body: UpdateUserRequest,
user_id: str = Depends(get_current_user),
user_id: UUID = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
@@ -55,7 +54,7 @@ async def update_me(
@router.delete("/me", status_code=status.HTTP_204_NO_CONTENT)
async def delete_me(
user_id: str = Depends(get_current_user),
user_id: UUID = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
+8 -33
View File
@@ -1,51 +1,26 @@
"""Redis/DragonflyDB caching helpers."""
import redis.asyncio as redis
from cartsnitch_api.config import settings
class CacheClient:
"""Redis/DragonflyDB caching with connection pooling.
"""Stub for Redis/DragonflyDB caching.
Will be used for expensive queries: price trends, product comparisons.
Cache invalidation via Redis pub/sub events from other services.
"""
def __init__(self) -> None:
self._pool: redis.ConnectionPool | None = None
self._client: redis.Redis | None = None
async def initialize(self) -> None:
"""Initialize the Redis connection pool."""
self._pool = redis.ConnectionPool.from_url(
settings.redis_url,
max_connections=20,
decode_responses=True,
)
self._client = redis.Redis(connection_pool=self._pool)
async def close(self) -> None:
"""Close the Redis connection pool."""
if self._client:
await self._client.aclose()
if self._pool:
await self._pool.aclose()
self.url = settings.redis_url
async def get(self, key: str) -> str | None:
if not self._client:
return None
return await self._client.get(key)
# TODO: implement with redis-py async
return None
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
if not self._client:
return
await self._client.set(key, value, ex=ttl_seconds)
# TODO: implement with redis-py async
pass
async def delete(self, key: str) -> None:
if not self._client:
return
await self._client.delete(key)
cache_client = CacheClient()
# TODO: implement with redis-py async
pass
+8 -34
View File
@@ -1,25 +1,23 @@
import base64
from pydantic import AliasChoices, Field, model_validator
from pydantic import model_validator
from pydantic_settings import BaseSettings
class Settings(BaseSettings):
model_config = {"env_prefix": "CARTSNITCH_"}
database_url: str = Field(
default="postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
validation_alias=AliasChoices("CARTSNITCH_DATABASE_URL", "DATABASE_URL"),
)
database_url: str = "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
redis_url: str = "redis://localhost:6379/0"
jwt_secret_key: str
jwt_secret_key: str = "change-me-in-production"
jwt_algorithm: str = "HS256"
jwt_access_token_expire_minutes: int = 15
jwt_refresh_token_expire_days: int = 7
service_key: str
fernet_key: str
service_key: str = "change-me-in-production"
# Valid Fernet key for local dev — MUST be overridden in production
fernet_key: str = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
auth_service_url: str = "http://auth:3001"
@@ -34,26 +32,9 @@ class Settings(BaseSettings):
rate_limit_window_seconds: int = 60
rate_limit_enabled: bool = True
_PLACEHOLDER_VALUES = {"change-me-in-production"}
@model_validator(mode="after")
def validate_secrets(self):
if not self.jwt_secret_key or self.jwt_secret_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_JWT_SECRET_KEY must be set to a secure value. "
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
)
if not self.service_key or self.service_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_SERVICE_KEY must be set to a secure value. "
'Generate one with: python -c "import secrets; print(secrets.token_urlsafe(32))"'
)
if not self.fernet_key or self.fernet_key in self._PLACEHOLDER_VALUES:
raise ValueError(
"CARTSNITCH_FERNET_KEY must be set to a valid Fernet key. "
"Generate one with: python -c "
"'from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())'"
)
def validate_fernet_key(self):
"""Validate fernet_key is a valid 32-byte url-safe base64 key at startup."""
try:
decoded = base64.urlsafe_b64decode(self.fernet_key.encode())
if len(decoded) != 32:
@@ -68,12 +49,5 @@ class Settings(BaseSettings):
) from None
return self
@model_validator(mode="after")
def normalize_database_url(self):
"""Normalize postgresql:// → postgresql+asyncpg:// for the asyncpg driver."""
if self.database_url.startswith("postgresql://"):
self.database_url = self.database_url.replace("postgresql://", "postgresql+asyncpg://", 1)
return self
settings = Settings()
+1 -13
View File
@@ -6,14 +6,7 @@ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_asyn
from cartsnitch_api.config import settings
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
pool_pre_ping=True,
pool_recycle=3600,
)
engine = create_async_engine(settings.database_url, echo=False)
async_session_factory = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
@@ -21,8 +14,3 @@ async def get_db() -> AsyncGenerator[AsyncSession, None]:
"""FastAPI dependency that yields an async DB session."""
async with async_session_factory() as session:
yield session
async def dispose_engine() -> None:
"""Dispose the database engine, closing all pooled connections."""
await engine.dispose()
+2 -7
View File
@@ -5,8 +5,6 @@ from contextlib import asynccontextmanager
from fastapi import APIRouter, FastAPI
from cartsnitch_api.auth.routes import router as auth_router
from cartsnitch_api.cache import cache_client
from cartsnitch_api.database import dispose_engine
from cartsnitch_api.middleware.cors import add_cors_middleware
from cartsnitch_api.middleware.error_handler import add_error_handlers, add_error_monitor_middleware
from cartsnitch_api.middleware.rate_limit import add_rate_limit_middleware
@@ -20,15 +18,13 @@ from cartsnitch_api.routes.purchases import router as purchases_router
from cartsnitch_api.routes.scraping import router as scraping_router
from cartsnitch_api.routes.shopping import router as shopping_router
from cartsnitch_api.routes.stores import router as stores_router
from cartsnitch_api.routes.user import router as user_router
@asynccontextmanager
async def lifespan(app: FastAPI):
await cache_client.initialize()
# TODO: initialize DB session pool, Redis connection, service clients
yield
await cache_client.close()
await dispose_engine()
# TODO: cleanup connections
def create_app() -> FastAPI:
@@ -53,7 +49,6 @@ def create_app() -> FastAPI:
# Data endpoints mounted under /api/v1
v1_router = APIRouter(prefix="/api/v1")
v1_router.include_router(user_router)
v1_router.include_router(stores_router)
v1_router.include_router(purchases_router)
v1_router.include_router(products_router)
+2 -2
View File
@@ -11,6 +11,6 @@ def add_cors_middleware(app: FastAPI) -> None:
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "PUT", "DELETE", "PATCH", "OPTIONS"],
allow_headers=["Content-Type", "Authorization", "Accept", "Origin", "X-Requested-With"],
allow_methods=["*"],
allow_headers=["*"],
)
@@ -4,7 +4,6 @@ Uses in-memory sliding window as fallback, Redis/DragonflyDB when available.
Per-IP limiting on public endpoints, per-token limiting on authenticated endpoints.
"""
import hashlib
import time
from collections import defaultdict
from threading import Lock
@@ -72,8 +71,8 @@ def _get_rate_limit_key(request: Request) -> tuple[str, _SlidingWindowCounter]:
auth_header = request.headers.get("authorization", "")
if auth_header.startswith("Bearer "):
token = auth_header[7:]
token_hash = hashlib.sha256(token.encode()).hexdigest()
return f"token:{token_hash}", _auth_limiter
# Use last 16 chars of token as key to avoid storing full tokens
return f"token:{token[-16:]}", _auth_limiter
# Fallback to IP for unauthenticated non-public endpoints
return f"ip:{_get_client_ip(request)}", _public_limiter
+1 -1
View File
@@ -33,7 +33,7 @@ class Purchase(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "purchases"
user_id: Mapped[str] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[str] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_location_id: Mapped[uuid.UUID | None] = mapped_column(ForeignKey("store_locations.id"))
receipt_id: Mapped[str] = mapped_column(String(200), nullable=False)
purchase_date: Mapped[date] = mapped_column(Date, nullable=False)
+4 -18
View File
@@ -1,11 +1,10 @@
"""User and UserStoreAccount models."""
import secrets
import uuid
from datetime import datetime
from typing import TYPE_CHECKING
import sqlalchemy as sa
from sqlalchemy import Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint
from sqlalchemy import DateTime, ForeignKey, String, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_api.constants import AccountStatus
@@ -24,21 +23,8 @@ class User(TimestampMixin, Base):
id: Mapped[str] = mapped_column(Text, primary_key=True)
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True)
hashed_password: Mapped[str] = mapped_column(String(255), nullable=False)
display_name: Mapped[str | None] = mapped_column(String(100))
email_verified: Mapped[bool] = mapped_column(
Boolean, nullable=False, server_default="false"
)
image: Mapped[str | None] = mapped_column(Text, nullable=True)
email_inbound_token: Mapped[str] = mapped_column(
String(22),
nullable=False,
unique=True,
default=lambda: secrets.token_urlsafe(16),
server_default=sa.text(
"replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')"
),
)
# Relationships
store_accounts: Mapped[list["UserStoreAccount"]] = relationship(back_populates="user")
@@ -52,7 +38,7 @@ class UserStoreAccount(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__table_args__ = (UniqueConstraint("user_id", "store_id", name="uq_user_store_account"),)
user_id: Mapped[str] = mapped_column(ForeignKey("users.id"), nullable=False)
store_id: Mapped[str] = mapped_column(ForeignKey("stores.id"), nullable=False)
store_id: Mapped[uuid.UUID] = mapped_column(ForeignKey("stores.id"), nullable=False)
session_data: Mapped[dict | None] = mapped_column(EncryptedJSON)
session_expires_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
last_sync_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
-32
View File
@@ -1,32 +0,0 @@
"""User routes: per-user account endpoints (email-in address, etc.)."""
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy.ext.asyncio import AsyncSession
from cartsnitch_api.auth.dependencies import get_current_user
from cartsnitch_api.database import get_db
from cartsnitch_api.schemas import EmailInAddressResponse
from cartsnitch_api.services.auth import AuthService
router = APIRouter(tags=["user"])
@router.get("/me/email-in-address", response_model=EmailInAddressResponse)
async def get_email_in_address(
user_id: str = Depends(get_current_user),
db: AsyncSession = Depends(get_db),
):
svc = AuthService(db)
try:
email_address = await svc.get_email_in_address(user_id)
return EmailInAddressResponse(
email_address=email_address,
instructions=(
"Forward your digital receipt emails to this address. "
"We currently support Meijer, Kroger, and Target receipt emails."
),
)
except LookupError:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND, detail="User not found"
) from None
-5
View File
@@ -22,11 +22,6 @@ class UserResponse(BaseModel):
created_at: datetime
class EmailInAddressResponse(BaseModel):
email_address: str
instructions: str
# ---------- Stores ----------
+5 -14
View File
@@ -5,6 +5,8 @@ handled by the Better-Auth service (auth/). This service provides
user lookup and profile update operations for the API gateway.
"""
from uuid import UUID
from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession
@@ -13,7 +15,7 @@ class AuthService:
def __init__(self, db: AsyncSession) -> None:
self.db = db
async def get_user(self, user_id: str) -> dict:
async def get_user(self, user_id: UUID) -> dict:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -28,7 +30,7 @@ class AuthService:
"created_at": user.created_at,
}
async def update_user(self, user_id: str, **fields) -> dict:
async def update_user(self, user_id: UUID, **fields) -> dict:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -56,7 +58,7 @@ class AuthService:
"created_at": user.created_at,
}
async def delete_user(self, user_id: str) -> None:
async def delete_user(self, user_id: UUID) -> None:
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
@@ -66,14 +68,3 @@ class AuthService:
await self.db.delete(user)
await self.db.commit()
async def get_email_in_address(self, user_id: str) -> str:
"""Return the per-user email-in address for receipt forwarding."""
from cartsnitch_api.models import User
result = await self.db.execute(select(User).where(User.id == user_id))
user = result.scalar_one_or_none()
if not user:
raise LookupError("User not found")
return f"receipts+{user.email_inbound_token}@receipts.cartsnitch.com"
+8 -36
View File
@@ -19,25 +19,6 @@ from cartsnitch_api.database import get_db
from cartsnitch_api.main import create_app
from cartsnitch_api.models import Base
TEST_JWT_SECRET = secrets.token_urlsafe(32)
TEST_SERVICE_KEY = secrets.token_urlsafe(32)
TEST_FERNET_KEY = "7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
@pytest.fixture(autouse=True)
def setup_test_settings():
original_jwt = cartsnitch_settings.jwt_secret_key
original_service = cartsnitch_settings.service_key
original_fernet = cartsnitch_settings.fernet_key
cartsnitch_settings.jwt_secret_key = TEST_JWT_SECRET
cartsnitch_settings.service_key = TEST_SERVICE_KEY
cartsnitch_settings.fernet_key = TEST_FERNET_KEY
yield
cartsnitch_settings.jwt_secret_key = original_jwt
cartsnitch_settings.service_key = original_service
cartsnitch_settings.fernet_key = original_fernet
TEST_DATABASE_URL = "sqlite+aiosqlite:///:memory:"
@@ -79,8 +60,7 @@ async def db_engine():
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
# Create Better-Auth tables (not managed by SQLAlchemy models)
await conn.execute(
text("""
await conn.execute(text("""
CREATE TABLE IF NOT EXISTS sessions (
id TEXT PRIMARY KEY,
token TEXT NOT NULL UNIQUE,
@@ -91,10 +71,8 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
""")
)
await conn.execute(
text("""
"""))
await conn.execute(text("""
CREATE TABLE IF NOT EXISTS accounts (
id TEXT PRIMARY KEY,
user_id TEXT NOT NULL,
@@ -110,10 +88,8 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
""")
)
await conn.execute(
text("""
"""))
await conn.execute(text("""
CREATE TABLE IF NOT EXISTS verifications (
id TEXT PRIMARY KEY,
identifier TEXT NOT NULL,
@@ -122,8 +98,7 @@ async def db_engine():
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL
)
""")
)
"""))
yield engine
@@ -158,13 +133,10 @@ async def client(db_engine):
app.dependency_overrides.clear()
async def _create_test_user_and_session(
client: AsyncClient, db_engine, **user_overrides
) -> tuple[dict, str]:
async def _create_test_user_and_session(client: AsyncClient, db_engine, **user_overrides) -> tuple[dict, str]:
"""Create a test user and a valid session directly in the DB.
Returns (user_dict, session_token). Better-Auth stores the raw token
in the DB, so we insert it as-is.
Returns (user_dict, session_token).
"""
user_id = str(uuid.uuid4())
email = user_overrides.get("email", "test@example.com")
@@ -71,56 +71,6 @@ async def test_delete_me(client, auth_headers):
assert resp.status_code == 404
@pytest.mark.asyncio
async def test_get_me_compound_cookie(client, db_engine):
"""Compound cookie value (token.sessionId) must be parsed to extract the token part."""
from tests.conftest import _create_test_user_and_session
_, session_token = await _create_test_user_and_session(
client, db_engine, email="compound@example.com", display_name="Compound User"
)
compound = f"{session_token}.B0atkJCFxK1rZlwWPMK97nVO2LnyDun7"
resp = await client.get(
"/auth/me",
headers={"Cookie": f"better-auth.session_token={compound}"},
)
assert resp.status_code == 200
assert resp.json()["email"] == "compound@example.com"
@pytest.mark.asyncio
async def test_get_me_raw_token_cookie(client, db_engine):
"""Raw token (no dot) in cookie must still work — regression guard."""
from tests.conftest import _create_test_user_and_session
_, session_token = await _create_test_user_and_session(
client, db_engine, email="rawcookie@example.com", display_name="Raw Cookie User"
)
resp = await client.get(
"/auth/me",
headers={"Cookie": f"better-auth.session_token={session_token}"},
)
assert resp.status_code == 200
assert resp.json()["email"] == "rawcookie@example.com"
@pytest.mark.asyncio
async def test_get_me_compound_bearer(client, db_engine):
"""Compound Bearer token (token.sessionId) must be parsed to extract the token part."""
from tests.conftest import _create_test_user_and_session
_, session_token = await _create_test_user_and_session(
client, db_engine, email="compoundbearer@example.com", display_name="Compound Bearer User"
)
compound = f"{session_token}.B0atkJCFxK1rZlwWPMK97nVO2LnyDun7"
resp = await client.get(
"/auth/me",
headers={"Authorization": f"Bearer {compound}"},
)
assert resp.status_code == 200
assert resp.json()["email"] == "compoundbearer@example.com"
@pytest.mark.asyncio
async def test_expired_session_rejected(client, db_engine):
"""Expired sessions must be rejected."""
-48
View File
@@ -1,48 +0,0 @@
"""Tests for Settings config, specifically the database_url env var fallback."""
import os
from cartsnitch_api.config import Settings
def test_database_url_prefers_cartsnitch_prefix():
"""CARTSNITCH_DATABASE_URL takes precedence over DATABASE_URL."""
env = {
"CARTSNITCH_DATABASE_URL": "postgresql+asyncpg://user1:pass1@host1:5432/db1",
"DATABASE_URL": "postgresql://user2:pass2@host2:5432/db2",
}
settings = Settings(**env)
assert settings.database_url == "postgresql+asyncpg://user1:pass1@host1:5432/db1"
def test_database_url_falls_back_to_database_url():
"""When CARTSNITCH_DATABASE_URL is absent, DATABASE_URL is accepted."""
env = {
"DATABASE_URL": "postgresql://user:pass@dbhost:5432/mydb",
}
settings = Settings(**env)
assert settings.database_url == "postgresql+asyncpg://user:pass@dbhost:5432/mydb"
def test_database_url_normalizes_plain_postgresql_prefix():
"""DATABASE_URL with plain postgresql:// is normalized to postgresql+asyncpg://."""
env = {
"DATABASE_URL": "postgresql://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
}
settings = Settings(**env)
assert settings.database_url == "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
def test_database_url_preserves_asyncpg_prefix():
"""CARTSNITCH_DATABASE_URL with postgresql+asyncpg:// is left unchanged."""
env = {
"CARTSNITCH_DATABASE_URL": "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch",
}
settings = Settings(**env)
assert settings.database_url == "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
def test_database_url_default():
"""When neither env var is set, the hardcoded default is used."""
settings = Settings()
assert settings.database_url == "postgresql+asyncpg://cartsnitch:cartsnitch@localhost:5432/cartsnitch"
@@ -1,61 +0,0 @@
"""Tests for GET /api/v1/me/email-in-address endpoint."""
import pytest
from httpx import AsyncClient
@pytest.mark.asyncio
async def test_get_email_in_address_authenticated(client: AsyncClient, auth_headers: dict):
"""Authenticated user gets their email-in address."""
response = await client.get(
"/api/v1/me/email-in-address",
headers=auth_headers,
)
assert response.status_code == 200
data = response.json()
assert "email_address" in data
assert data["email_address"].startswith("receipts+")
assert data["email_address"].endswith("@receipts.cartsnitch.com")
assert len(data["email_address"]) > len("receipts+@receipts.cartsnitch.com")
assert "instructions" in data
assert "Meijer" in data["instructions"]
assert "Kroger" in data["instructions"]
assert "Target" in data["instructions"]
@pytest.mark.asyncio
async def test_get_email_in_address_unauthenticated(client: AsyncClient):
"""Unauthenticated request returns 401."""
response = await client.get("/api/v1/me/email-in-address")
assert response.status_code == 401
@pytest.mark.asyncio
async def test_get_email_in_address_invalid_token(client: AsyncClient):
"""Invalid JWT token returns 401."""
response = await client.get(
"/api/v1/me/email-in-address",
headers={"Authorization": "Bearer invalid-token-xyz"},
)
assert response.status_code == 401
@pytest.mark.asyncio
async def test_email_address_format(client: AsyncClient, auth_headers: dict):
"""Email address format is receipts+{22-char-urlsafe-token}@receipts.cartsnitch.com."""
response = await client.get(
"/api/v1/me/email-in-address",
headers=auth_headers,
)
assert response.status_code == 200
data = response.json()
email = data["email_address"]
# Format: receipts+<22-char-urlsafe-token>@receipts.cartsnitch.com
assert email.startswith("receipts+")
assert email.endswith("@receipts.cartsnitch.com")
# token_urlsafe(16) produces 22 chars
middle = email[len("receipts+") : -len("@receipts.cartsnitch.com")]
assert len(middle) == 22
assert "@" not in middle
+1 -32
View File
@@ -1,10 +1,8 @@
"""Tests for rate limiting middleware."""
from unittest.mock import MagicMock
import pytest
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter, _get_rate_limit_key
from cartsnitch_api.middleware.rate_limit import _SlidingWindowCounter
class TestSlidingWindowCounter:
@@ -55,32 +53,3 @@ async def test_health_skips_rate_limit(client):
resp = await client.get("/health")
assert resp.status_code == 200
assert "x-ratelimit-limit" not in resp.headers
class TestGetRateLimitKey:
def _make_request(self, auth_header: str = "") -> MagicMock:
req = MagicMock()
req.url.path = "/purchases"
req.headers = {"authorization": auth_header} if auth_header else {}
return req
def test_distinct_tokens_produce_distinct_keys(self):
req1 = self._make_request("Bearer token_alpha_12345")
req2 = self._make_request("Bearer token_beta_67890")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 != key2
def test_same_token_produces_same_key(self):
req1 = self._make_request("Bearer same_token_value_abc")
req2 = self._make_request("Bearer same_token_value_abc")
key1, _ = _get_rate_limit_key(req1)
key2, _ = _get_rate_limit_key(req2)
assert key1 == key2
def test_key_does_not_contain_raw_token_suffix(self):
raw_token = "my_secret_jwt_token_xyz"
req = self._make_request(f"Bearer {raw_token}")
key, _ = _get_rate_limit_key(req)
assert raw_token[-16:] not in key
assert raw_token not in key
+2 -3
View File
@@ -6,14 +6,13 @@ from httpx import ASGITransport, AsyncClient
from cartsnitch_api.main import app
EXPECTED_ROUTES = [
# Auth (7)
# Auth (6)
("post", "/auth/register"),
("post", "/auth/login"),
("post", "/auth/refresh"),
("get", "/auth/me"),
("patch", "/auth/me"),
("delete", "/auth/me"),
("get", "/auth/me/email-in-address"),
# Stores (4)
("get", "/stores"),
("get", "/me/stores"),
@@ -90,4 +89,4 @@ async def test_route_count():
if method in ("get", "post", "put", "delete", "patch"):
count += 1
assert count == 34, f"Expected 34 routes, found {count}"
assert count == 33, f"Expected 33 routes, found {count}"
-1
View File
@@ -95,6 +95,5 @@ export const auth = betterAuth({
"https://cartsnitch.com",
"https://cartsnitch.farh.net",
"https://cartsnitch.dev.farh.net",
"https://cartsnitch.uat.farh.net",
],
});
+1 -1
View File
@@ -14,7 +14,7 @@ if config.config_file_name is not None:
db_url = os.environ.get("CARTSNITCH_DATABASE_URL_SYNC")
if db_url:
config.set_main_option("sqlalchemy.url", db_url.replace("%", "%%"))
config.set_main_option("sqlalchemy.url", db_url)
target_metadata = Base.metadata
@@ -1,37 +0,0 @@
"""Add email_inbound_token to users.
Revision ID: 001_add_email_inbound_token
Revises:
Create Date: 2026-04-02
"""
from collections.abc import Sequence
import sqlalchemy as sa
from alembic import op
revision: str = "001_add_email_inbound_token"
down_revision: str | None = None
branch_labels: str | Sequence[str] | None = None
depends_on: str | Sequence[str] | None = None
def upgrade() -> None:
op.add_column("users", sa.Column("email_inbound_token", sa.String(22), nullable=True))
op.create_unique_constraint("uq_users_email_inbound_token", "users", ["email_inbound_token"])
# Backfill existing users with generated tokens (PostgreSQL)
op.execute(
"UPDATE users SET email_inbound_token = "
"substring(replace(gen_random_uuid()::text, '-', ''), 1, 22) "
"WHERE email_inbound_token IS NULL"
)
# Alter to non-nullable
op.alter_column("users", "email_inbound_token", nullable=False)
def downgrade() -> None:
op.drop_constraint("uq_users_email_inbound_token", "users", type_="unique")
op.drop_column("users", "email_inbound_token")
+1 -11
View File
@@ -1,11 +1,10 @@
"""User and UserStoreAccount models."""
import secrets
import uuid
from datetime import datetime
from typing import TYPE_CHECKING
from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint, text
from sqlalchemy import JSON, Boolean, DateTime, ForeignKey, String, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column, relationship
from cartsnitch_common.constants import AccountStatus
@@ -22,15 +21,6 @@ class User(UUIDPrimaryKeyMixin, TimestampMixin, Base):
__tablename__ = "users"
email: Mapped[str] = mapped_column(String(255), nullable=False, unique=True)
email_inbound_token: Mapped[str] = mapped_column(
String(22),
nullable=False,
unique=True,
default=lambda: secrets.token_urlsafe(16),
server_default=text(
"replace(replace(trim(trailing '=' from encode(gen_random_bytes(16), 'base64')), '+', '-'), '/', '_')"
),
)
hashed_password: Mapped[str | None] = mapped_column(String(255), nullable=True)
display_name: Mapped[str | None] = mapped_column(String(100))
email_verified: Mapped[bool] = mapped_column(Boolean, nullable=False, server_default="false")
@@ -20,7 +20,6 @@ class UserRead(BaseModel):
id: uuid.UUID
email: str
display_name: str | None
email_inbound_token: str
created_at: datetime
updated_at: datetime
-34
View File
@@ -147,40 +147,6 @@ class TestStoreLocationModel:
assert loc.lat == pytest.approx(42.2808)
class TestUserModel:
def test_email_inbound_token_auto_populated(self, session):
user = User(
id=uuid.uuid4(),
email="token_test@example.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add(user)
session.commit()
assert user.email_inbound_token is not None
assert len(user.email_inbound_token) == 22
def test_email_inbound_token_unique(self, session):
user1 = User(
id=uuid.uuid4(),
email="user1@example.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
user2 = User(
id=uuid.uuid4(),
email="user2@example.com",
hashed_password="hashed",
created_at=datetime.now(UTC),
updated_at=datetime.now(UTC),
)
session.add_all([user1, user2])
session.commit()
assert user1.email_inbound_token != user2.email_inbound_token
class TestUserStoreAccountModel:
def test_account_status_enum(self, session):
user = User(
-6
View File
@@ -9,12 +9,6 @@ server {
gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript image/svg+xml;
gzip_min_length 256;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "strict-origin-when-cross-origin" always;
add_header Content-Security-Policy "default-src 'self'; script-src 'self'; style-src 'self' 'unsafe-inline'; img-src 'self' data: https:; font-src 'self'; connect-src 'self' https://*.cartsnitch.com https://*.farh.net; frame-ancestors 'self'" always;
# Health endpoint for K8s probes
location /health {
access_log off;
+650 -548
View File
File diff suppressed because it is too large Load Diff
+1 -4
View File
@@ -50,9 +50,6 @@
"overrides": {
"@rollup/pluginutils": "5.3.0",
"flatted": "^3.4.2",
"serialize-javascript": "7.0.5",
"brace-expansion": ">=1.1.13",
"lodash": ">=4.17.24",
"minimatch": "^10.2.4"
"serialize-javascript": "7.0.5"
}
}
-5
View File
@@ -14,13 +14,11 @@ dependencies = [
"cryptography>=42.0,<44.0",
"fastapi>=0.115,<1.0",
"uvicorn[standard]>=0.30,<1.0",
"beautifulsoup4>=4.12,<5.0",
"redis>=5.0,<6.0",
"pydantic>=2.0,<3.0",
"pydantic-settings>=2.0,<3.0",
"sqlalchemy[asyncio]>=2.0,<3.0",
"asyncpg>=0.29,<1.0",
"resend>=2.0",
]
[project.optional-dependencies]
@@ -29,9 +27,6 @@ dev = [
"pytest-asyncio>=0.23",
"ruff>=0.3",
"pytest-cov>=5.0",
"fakeredis[aioredis]>=2.20",
"httpx>=0.27",
"python-multipart>=0.0.9",
]
[tool.hatch.build.targets.wheel]
@@ -1,65 +1,9 @@
"""Internal API routes for triggering scrapes and checking status."""
import hashlib
import hmac
import re
import time
from fastapi import APIRouter, HTTPException, Request
from receiptwitness.config import settings
from receiptwitness.queue.email import EmailJob, enqueue_email, get_redis
from fastapi import APIRouter
router = APIRouter()
TOKEN_PATTERN = re.compile(r"receipts\+([A-Za-z0-9_-]+)@")
def verify_mailgun_signature(token: str, timestamp: str, signature: str) -> bool:
"""Verify Mailgun webhook signature."""
try:
ts = int(timestamp)
except (ValueError, TypeError):
return False
if abs(time.time() - ts) > 300: # 5 min freshness
return False
key = settings.mailgun_webhook_signing_key.encode()
hmac_digest = hmac.new(key, f"{timestamp}{token}".encode(), hashlib.sha256).hexdigest()
return hmac.compare_digest(signature, hmac_digest)
@router.post("/inbound/email")
async def receive_inbound_email(request: Request):
form = await request.form()
# 1. Verify Mailgun signature
token = str(form.get("token", ""))
timestamp = str(form.get("timestamp", ""))
signature = str(form.get("signature", ""))
if not verify_mailgun_signature(token, timestamp, signature):
raise HTTPException(status_code=406, detail="Invalid signature")
# 2. Extract account token from recipient
recipient = str(form.get("recipient", ""))
match = TOKEN_PATTERN.search(recipient)
if not match:
raise HTTPException(status_code=406, detail="Invalid recipient")
account_token = match.group(1)
# 3. Enqueue — worker resolves token -> user_id
body_html_val = form.get("body-html")
body_plain_val = form.get("body-plain")
job = EmailJob(
user_id=account_token,
sender=str(form.get("sender", "")),
recipient=recipient,
subject=str(form.get("subject", "")),
body_html=str(body_html_val) if body_html_val is not None else None,
body_plain=str(body_plain_val) if body_plain_val is not None else None,
received_at=str(form.get("timestamp", "")),
message_id=str(form.get("Message-Id", "")),
)
client = await get_redis()
await enqueue_email(client, job)
return {"status": "queued"}
@router.get("/health")
async def health():
+1 -42
View File
@@ -1,12 +1,8 @@
"""Service-specific configuration for ReceiptWitness."""
from pydantic import model_validator
from pydantic_settings import BaseSettings
_PLACEHOLDER_VALUES = {"change-me-in-production"}
class ReceiptWitnessSettings(BaseSettings):
model_config = {"env_prefix": "RW_"}
@@ -26,42 +22,5 @@ class ReceiptWitnessSettings(BaseSettings):
headless: bool = True
browser_timeout_ms: int = 60000
# Email notifications (Resend)
resend_api_key: str = ""
notification_email_from: str = "notifications@cartsnitch.com"
notifications_enabled: bool = False
# Mailgun inbound email webhook
mailgun_webhook_signing_key: str = ""
@model_validator(mode="after")
def validate_required_vars(self):
errors = []
if not self.session_encryption_key or self.session_encryption_key in _PLACEHOLDER_VALUES:
errors.append(
"RW_SESSION_ENCRYPTION_KEY must be set to a secure value. "
'Generate one with: python -c "from cryptography.fernet import Fernet; print(Fernet.generate_key().decode())"'
)
if self.notifications_enabled and not self.resend_api_key:
errors.append(
"RW_RESEND_API_KEY must be set when RW_NOTIFICATIONS_ENABLED=true. "
"Get an API key from https://resend.com/api-keys"
)
if errors:
raise ValueError(
"ReceiptWitness startup failed — missing required config:\n"
+ "\n".join(f" - {e}" for e in errors)
)
return self
class _LazySettings:
_instance: ReceiptWitnessSettings | None = None
def __getattr__(self, name: str):
if _LazySettings._instance is None:
_LazySettings._instance = ReceiptWitnessSettings()
return getattr(_LazySettings._instance, name)
settings = _LazySettings()
settings = ReceiptWitnessSettings()
+8 -46
View File
@@ -2,17 +2,12 @@
import json
import logging
import uuid
from datetime import UTC, datetime
from decimal import Decimal
import redis.asyncio as aioredis
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from sqlalchemy import select
from receiptwitness.config import settings
from receiptwitness.notifications.email import send_receipt_notification
logger = logging.getLogger(__name__)
@@ -44,36 +39,6 @@ async def get_redis_client() -> aioredis.Redis:
return aioredis.Redis(connection_pool=_get_pool())
async def _send_notification_for_event(payload: dict) -> None:
"""Look up user email and send receipt notification. Silently skips on error."""
try:
user_uuid = uuid.UUID(payload["user_id"])
except (ValueError, KeyError):
logger.warning("Invalid user_id in event payload: %s", payload.get("user_id"))
return
try:
session_factory = get_async_session_factory(settings.database_url)
async with session_factory() as session:
result = await session.execute(select(User.email).where(User.id == user_uuid))
row = result.scalar_one_or_none()
if not row:
logger.warning("User %s not found for notification", user_uuid)
return
user_email = row
except Exception:
logger.exception("Failed to look up user email for notification")
return
await send_receipt_notification(
user_email=user_email,
store_name=payload["store_slug"],
item_count=payload["item_count"],
total=payload["total"],
purchase_date=payload["purchase_date"],
)
async def publish_receipt_ingested(
user_id: str,
store_slug: str,
@@ -83,19 +48,18 @@ async def publish_receipt_ingested(
total: Decimal | float,
) -> None:
"""Publish a cartsnitch.receipts.ingested event after successful ingestion."""
payload = {
"user_id": user_id,
"store_slug": store_slug,
"purchase_id": purchase_id,
"purchase_date": purchase_date,
"item_count": item_count,
"total": float(total) if isinstance(total, Decimal) else total,
}
event = {
"event_type": CHANNEL_RECEIPTS_INGESTED,
"timestamp": datetime.now(UTC).isoformat(),
"service": "receiptwitness",
"payload": payload,
"payload": {
"user_id": user_id,
"store_slug": store_slug,
"purchase_id": purchase_id,
"purchase_date": purchase_date,
"item_count": item_count,
"total": float(total) if isinstance(total, Decimal) else total,
},
}
try:
@@ -109,5 +73,3 @@ async def publish_receipt_ingested(
except aioredis.ConnectionError:
logger.error("Failed to publish event — Redis/DragonflyDB connection error")
raise
else:
await _send_notification_for_event(payload)
@@ -1,45 +0,0 @@
"""Email notifications via Resend."""
import asyncio
import html
import logging
import resend
from receiptwitness.config import settings
logger = logging.getLogger(__name__)
async def send_receipt_notification(
user_email: str,
store_name: str,
item_count: int,
total: float,
purchase_date: str,
) -> None:
"""Send receipt ingestion confirmation email via Resend."""
if not settings.notifications_enabled or not settings.resend_api_key:
logger.debug("Notifications disabled — skipping email send")
return
resend.api_key = settings.resend_api_key
store_name_safe = html.escape(store_name)
purchase_date_safe = html.escape(purchase_date)
try:
await asyncio.to_thread(
resend.Emails.send,
{
"from": settings.notification_email_from,
"to": [user_email],
"subject": f"Receipt processed: {store_name} - ${total:.2f}",
"html": (
f"<p>Your receipt from <strong>{store_name_safe}</strong> on "
f"{purchase_date_safe} has been processed.</p>"
f"<p>{item_count} items, total: ${total:.2f}</p>"
),
},
)
logger.info("Receipt notification sent to %s", user_email)
except Exception:
logger.exception("Failed to send receipt notification to %s", user_email)
@@ -1 +0,0 @@
"""Email receipt parsers for retailer email receipts."""
@@ -1,32 +0,0 @@
"""Base interface for email receipt parsers."""
from abc import ABC, abstractmethod
from dataclasses import dataclass, field
@dataclass
class EmailReceipt:
"""Raw email data before parsing."""
sender: str
recipient: str
subject: str
body_html: str | None = None
body_plain: str | None = None
received_at: str | None = None
raw_headers: dict = field(default_factory=dict)
class BaseEmailParser(ABC):
"""All retailer email parsers implement this interface."""
@abstractmethod
def can_parse(self, email: EmailReceipt) -> bool:
"""Return True if this parser handles this email."""
...
@abstractmethod
def parse(self, email: EmailReceipt) -> dict:
"""Parse email into a dict matching PurchaseCreate schema fields.
Must include an items list matching PurchaseItemCreate fields."""
...
@@ -1,25 +0,0 @@
"""Detect which retailer sent a receipt email."""
import re
from receiptwitness.parsers.email.base import EmailReceipt
RETAILER_PATTERNS: dict[str, list[str]] = {
"meijer": [r"@meijer\.com$", r"@email\.meijer\.com$"],
"kroger": [r"@kroger\.com$", r"@email\.kroger\.com$"],
"target": [r"@target\.com$", r"@email\.target\.com$"],
}
def detect_retailer(email: EmailReceipt) -> str | None:
"""Return retailer slug or None if unrecognized."""
sender = email.sender.lower().strip()
# Extract email from "Name <email>" format
match = re.search(r"<([^>]+)>", sender)
if match:
sender = match.group(1)
for retailer, patterns in RETAILER_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, sender):
return retailer
return None
@@ -1,157 +0,0 @@
"""Kroger email receipt parser."""
import logging
import re
from datetime import datetime
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
logger = logging.getLogger(__name__)
def _to_decimal(value: str | float | int | None, default: Decimal = Decimal("0")) -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return default
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError):
return default
def _extract_total(body: str) -> Decimal:
"""Extract the transaction total from email body."""
patterns = [
r"Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Amount[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Grand\s+Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
]
for pattern in patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return _to_decimal(match.group(1))
return Decimal("0")
def _extract_receipt_id(body: str) -> str | None:
"""Extract receipt ID / transaction ID from HTML body.
Strips HTML tags first so that whitespace between delimiters and values
(e.g. from ``</strong> KR-2026-0315-4829`` -> `` KR-2026-0315-4829``)
is normalized and the pattern can match cleanly.
"""
stripped = re.sub(r"<[^>]+>", "", body)
patterns = [
r"Receipt\s*#[:\s]*([A-Z0-9-]+)",
r"Transaction\s*#[:\s]*([A-Z0-9-]+)",
r"Order\s*#[:\s]*([A-Z0-9-]+)",
r"Confirmation\s*#[:\s]*([A-Z0-9-]+)",
]
for pattern in patterns:
match = re.search(pattern, stripped, re.IGNORECASE)
if match:
return match.group(1)
return None
def _extract_date(body: str) -> str:
"""Extract purchase date from email body. Returns ISO date string or empty string."""
patterns = [
r"(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})",
r"([A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4})",
]
for pattern in patterns:
match = re.search(pattern, body)
if match:
raw = match.group(1)
try:
dt = datetime.strptime(raw.replace(",", ""), "%b %d %Y")
return dt.strftime("%Y-%m-%d")
except ValueError:
pass
try:
for fmt in ("%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y", "%d/%m/%y"):
try:
dt = datetime.strptime(raw, fmt)
return dt.strftime("%Y-%m-%d")
except ValueError:
continue
except Exception:
pass
return ""
def _extract_items_soup(body: str) -> list[dict]:
"""Extract line items from HTML email body using BeautifulSoup."""
items = []
try:
soup = BeautifulSoup(body, "html.parser")
text = soup.get_text(separator="\n", strip=True)
# Strip HTML tags from raw body to normalize whitespace
stripped = re.sub(r"<[^>]+>", " ", body)
stripped = re.sub(r"\s+", " ", stripped)
skip_prefixes = (
"Subtotal",
"Tax",
"Total",
"Kroger",
"Target",
"Date",
"Receipt",
"Order",
"Transaction",
"Confirmation",
"Thank",
"Questions",
"Keep",
"Receipt",
)
for line in text.split("\n"):
line = line.strip()
if not line or line.startswith(skip_prefixes):
continue
# Match lines like "Product Name $9.99"
match = re.match(r"(.+?)\s+\$([0-9]+\.[0-9]{2})\s*$", line)
if match:
name = match.group(1).strip()
price = _to_decimal(match.group(2))
if len(name) > 2 and price > 0:
items.append(
{
"product_name_raw": name,
"quantity": Decimal("1"),
"unit_price": price,
"extended_price": price,
}
)
except Exception:
pass
return items[:20]
class KrogerEmailParser(BaseEmailParser):
"""Parse Kroger email receipts (digital receipts via kroger.com)."""
KROGER_KEYWORDS = ("kroger", "kroger.com", "plus")
def can_parse(self, email: EmailReceipt) -> bool:
sender = (email.sender or "").lower()
body = (email.body_html or email.body_plain or "").lower()
return any(kw in sender or kw in body for kw in self.KROGER_KEYWORDS)
def parse(self, email: EmailReceipt) -> dict:
body = (email.body_html or email.body_plain or "").strip()
total = _extract_total(body)
receipt_id = _extract_receipt_id(body) or ""
purchase_date = _extract_date(body)
items = _extract_items_soup(body)
return {
"receipt_id": receipt_id,
"purchase_date": purchase_date,
"total": total,
"items": items,
}
@@ -1,259 +0,0 @@
"""Parse Meijer digital receipt emails into structured purchase data."""
import re
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from bs4.element import Tag
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
def _to_decimal(value, default: str = "0") -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return Decimal(default)
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError, TypeError):
return Decimal(default)
def _extract_receipt_id(soup: BeautifulSoup, subject: str | None) -> str | None:
"""Extract receipt/transaction ID from subject or body."""
if subject:
match = re.search(r"TXN[-\s]\d{4}[-\s]\d{4}[-\s]\d+", subject)
if match:
return match.group(0).replace(" ", "-")
# Fallback: look in body
text = soup.get_text()
match = re.search(r"TXN[-\s]\d{4}[-\s]\d{4}[-\s]\d+", text)
if match:
return match.group(0).replace(" ", "-")
return None
def _extract_purchase_date(soup: BeautifulSoup, subject: str | None) -> str | None:
"""Extract purchase date from subject or body."""
text = soup.get_text()
# Try ISO format first: YYYY-MM-DD
match = re.search(r"(\d{4})-(\d{2})-(\d{2})", text)
if match:
return f"{match.group(1)}-{match.group(2)}-{match.group(3)}"
# Try written format: March 15, 2026
match = re.search(r"([A-Za-z]+)\s+(\d{1,2}),?\s+(\d{4})", text)
if match:
month_str = match.group(1).lower()
day = match.group(2)
year = match.group(3)
month_map = {
"january": "01",
"february": "02",
"march": "03",
"april": "04",
"may": "05",
"june": "06",
"july": "07",
"august": "08",
"september": "09",
"october": "10",
"november": "11",
"december": "12",
}
month = month_map.get(month_str)
if month:
return f"{year}-{month}-{day.zfill(2)}"
# MM/DD/YYYY
match = re.search(r"(\d{1,2})/(\d{1,2})/(\d{4})", text)
if match:
return f"{match.group(3)}-{match.group(1).zfill(2)}-{match.group(2).zfill(2)}"
return None
def _extract_store_info(soup: BeautifulSoup) -> dict:
"""Extract store name and number from the email body."""
store_info: dict = {}
# Look for store number in header
store_num_match = re.search(r"Meijer\s+Store\s+#?(\d+)", soup.get_text(), re.IGNORECASE)
if store_num_match:
store_info["store_number"] = store_num_match.group(1)
return store_info
def _extract_items(table: Tag | None) -> list[dict]:
"""Extract line items from the items table."""
items: list[dict] = []
if not table:
return items
rows = table.find_all("tr")
for row in rows:
cells = row.find_all("td")
if len(cells) < 3:
continue
name_cell = cells[0].get_text(strip=True)
qty_cell = cells[1].get_text(strip=True)
price_cell = cells[2].get_text(strip=True)
if not name_cell or name_cell.lower() in ("item", "description"):
continue
# Skip subtotal/tax/total/savings rows
if any(
label in name_cell.lower()
for label in ("subtotal", "tax", "total", "savings", "grand total")
):
continue
try:
quantity = Decimal(qty_cell)
except (InvalidOperation, ValueError, TypeError):
quantity = Decimal("1")
price_str = price_cell.replace("$", "").replace(",", "").strip()
try:
unit_price = Decimal(price_str)
except (InvalidOperation, ValueError, TypeError):
unit_price = Decimal("0")
extended_price = unit_price # Default to unit price; no qty column in fixture
items.append(
{
"product_name_raw": name_cell,
"quantity": quantity,
"unit_price": unit_price,
"extended_price": extended_price,
}
)
return items
def _extract_totals_plain(text: str) -> dict:
"""Extract totals from plain text (no HTML)."""
totals: dict = {
"subtotal": None,
"tax": None,
"total": None,
"savings_total": None,
}
match = re.search(r"\bSubtotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["subtotal"] = _to_decimal(match.group(1))
match = re.search(r"\bTax\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["tax"] = _to_decimal(match.group(1))
grand_total_match = re.search(r"Grand\s+Total\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if grand_total_match:
totals["total"] = _to_decimal(grand_total_match.group(1))
savings_match = re.search(r"\bSavings\b[:\s$\-]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if savings_match:
totals["savings_total"] = _to_decimal(savings_match.group(1))
if totals["total"] is None:
total_match = re.search(r"\bTotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if total_match:
totals["total"] = _to_decimal(total_match.group(1))
return totals
def _extract_totals(soup: BeautifulSoup) -> dict:
"""Extract subtotal, tax, total, and savings from the totals section."""
text = soup.get_text()
totals: dict = {
"subtotal": None,
"tax": None,
"total": None,
"savings_total": None,
}
# Subtotal — use word boundary to avoid matching "Subtotal" with "Total"
match = re.search(r"\bSubtotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["subtotal"] = _to_decimal(match.group(1))
# Tax
match = re.search(r"\bTax\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if match:
totals["tax"] = _to_decimal(match.group(1))
# Grand Total (before plain "Total" to avoid matching "Subtotal")
grand_total_match = re.search(r"Grand\s+Total\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if grand_total_match:
totals["total"] = _to_decimal(grand_total_match.group(1))
# Savings — allow any combination of whitespace/$- around the number
savings_match = re.search(r"\bSavings\b[:\s$\-]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if savings_match:
totals["savings_total"] = _to_decimal(savings_match.group(1))
# Plain "Total" only if Grand Total wasn't found
if totals["total"] is None:
total_match = re.search(r"\bTotal\b[:\s$]*([0-9,]+\.?\d*)", text, re.IGNORECASE)
if total_match:
totals["total"] = _to_decimal(total_match.group(1))
return totals
class MeijerEmailParser(BaseEmailParser):
"""Parse Meijer digital receipt emails forwarded by users."""
def can_parse(self, email: EmailReceipt) -> bool:
sender = email.sender.lower().strip()
# Extract email from "Name <email>" format
match = re.search(r"<([^>]+)>", sender)
if match:
sender = match.group(1)
return "meijer" in sender
def parse(self, email: EmailReceipt) -> dict:
body_html = email.body_html
body_plain = email.body_plain or ""
body = body_html or body_plain
soup = BeautifulSoup(body, "html.parser")
receipt_id = _extract_receipt_id(soup, email.subject)
purchase_date = _extract_purchase_date(soup, email.subject)
_ = _extract_store_info(soup)
# Find the items table — look for one with Item/Qty/Price headers
table = None
for tbl in soup.find_all("table"):
headers = tbl.find_all("th")
header_texts = [h.get_text(strip=True).lower() for h in headers]
if any("item" in h or "qty" in h or "price" in h for h in header_texts):
table = tbl
break
items = _extract_items(table)
# Extract totals from HTML; fall back to plain text if no HTML
if body_html:
totals = _extract_totals(soup)
else:
totals = _extract_totals_plain(body_plain)
return {
"receipt_id": receipt_id or "",
"purchase_date": purchase_date or "",
"total": totals["total"] or Decimal("0"),
"subtotal": totals["subtotal"],
"tax": totals["tax"],
"savings_total": totals["savings_total"],
"items": items,
}
@@ -1,156 +0,0 @@
"""Target email receipt parser."""
import logging
import re
from datetime import datetime
from decimal import Decimal, InvalidOperation
from bs4 import BeautifulSoup
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
logger = logging.getLogger(__name__)
def _to_decimal(value: str | float | int | None, default: Decimal = Decimal("0")) -> Decimal:
"""Safely convert a value to Decimal."""
if value is None:
return default
try:
return Decimal(str(value).replace("$", "").replace(",", "").strip())
except (InvalidOperation, ValueError):
return default
def _extract_total(body: str) -> Decimal:
"""Extract the transaction total from email body."""
patterns = [
r"Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Amount[:\s]*\$?([0-9,]+\.[0-9]{2})",
r"Grand\s+Total[:\s]*\$?([0-9,]+\.[0-9]{2})",
]
for pattern in patterns:
match = re.search(pattern, body, re.IGNORECASE)
if match:
return _to_decimal(match.group(1))
return Decimal("0")
def _extract_receipt_id(body: str) -> str | None:
"""Extract receipt ID / transaction ID from HTML body.
Strips HTML tags first so that whitespace between delimiters and values
(e.g. from ``</strong> TGT-2026-0318-9124`` -> `` TGT-2026-0318-9124``)
is normalized and the pattern can match cleanly.
"""
stripped = re.sub(r"<[^>]+>", "", body)
patterns = [
r"Receipt\s*#[:\s]*([A-Z0-9-]+)",
r"Order\s*#[:\s]*([A-Z0-9-]+)",
r"Confirmation\s*#[:\s]*([A-Z0-9-]+)",
r"Target\s+Order\s*#[:\s]*([A-Z0-9-]+)",
]
for pattern in patterns:
match = re.search(pattern, stripped, re.IGNORECASE)
if match:
return match.group(1)
return None
def _extract_date(body: str) -> str:
"""Extract purchase date from email body. Returns ISO date string or empty string."""
patterns = [
r"(\d{1,2}[/-]\d{1,2}[/-]\d{2,4})",
r"([A-Z][a-z]{2}\s+\d{1,2},?\s+\d{4})",
]
for pattern in patterns:
match = re.search(pattern, body)
if match:
raw = match.group(1)
try:
dt = datetime.strptime(raw.replace(",", ""), "%b %d %Y")
return dt.strftime("%Y-%m-%d")
except ValueError:
pass
try:
for fmt in ("%m/%d/%Y", "%m/%d/%y", "%d/%m/%Y", "%d/%m/%y"):
try:
dt = datetime.strptime(raw, fmt)
return dt.strftime("%Y-%m-%d")
except ValueError:
continue
except Exception:
pass
return ""
def _extract_items_soup(body: str) -> list[dict]:
"""Extract line items from HTML email body using BeautifulSoup."""
items = []
try:
soup = BeautifulSoup(body, "html.parser")
text = soup.get_text(separator="\n", strip=True)
for line in text.split("\n"):
line = line.strip()
if not line or line.startswith(
(
"Subtotal",
"Tax",
"Total",
"Target",
"Kroger",
"Date",
"Receipt",
"Order",
"Transaction",
"Confirmation",
"Thank",
"Questions",
"Keep",
"Receipt",
"Store",
)
):
continue
# Match lines like "Product Name $9.99"
match = re.match(r"(.+?)\s+\$([0-9]+\.[0-9]{2})\s*$", line)
if match:
name = match.group(1).strip()
price = _to_decimal(match.group(2))
if len(name) > 2 and price > 0:
items.append(
{
"product_name_raw": name,
"quantity": Decimal("1"),
"unit_price": price,
"extended_price": price,
}
)
except Exception:
pass
return items[:20]
class TargetEmailParser(BaseEmailParser):
"""Parse Target email receipts (Circle order confirmations)."""
TARGET_KEYWORDS = ("target.com", "targetnow", "circle", "target")
def can_parse(self, email: EmailReceipt) -> bool:
sender = (email.sender or "").lower()
body = (email.body_html or email.body_plain or "").lower()
return any(kw in sender or kw in body for kw in self.TARGET_KEYWORDS)
def parse(self, email: EmailReceipt) -> dict:
body = (email.body_html or email.body_plain or "").strip()
total = _extract_total(body)
receipt_id = _extract_receipt_id(body) or ""
purchase_date = _extract_date(body)
items = _extract_items_soup(body)
return {
"receipt_id": receipt_id,
"purchase_date": purchase_date,
"total": total,
"items": items,
}
@@ -1 +0,0 @@
"""DragonflyDB Streams queue for email receipt processing."""
@@ -1,77 +0,0 @@
"""DragonflyDB Streams queue for email receipt processing."""
from __future__ import annotations
import json
import logging
from dataclasses import asdict, dataclass
from typing import cast
import redis.asyncio as aioredis
from receiptwitness.config import settings
logger = logging.getLogger(__name__)
STREAM_KEY = "email:receipts"
CONSUMER_GROUP = "email-workers"
@dataclass
class EmailJob:
"""Payload for an email receipt processing job."""
user_id: str
sender: str
recipient: str
subject: str
body_html: str | None
body_plain: str | None
received_at: str
message_id: str # from email provider, for dedup
async def get_redis() -> aioredis.Redis:
"""Get async Redis/DragonflyDB client."""
return cast(aioredis.Redis, aioredis.from_url(settings.redis_url, decode_responses=True))
async def ensure_consumer_group(client: aioredis.Redis) -> None:
"""Create consumer group if it does not exist."""
try:
await client.xgroup_create(STREAM_KEY, CONSUMER_GROUP, id="0", mkstream=True)
except aioredis.ResponseError as e:
if "BUSYGROUP" not in str(e):
raise
async def enqueue_email(client: aioredis.Redis, job: EmailJob) -> str:
"""Add email job to the stream. Returns the stream message ID."""
payload: dict[str, str | bytes | int | float] = {"data": json.dumps(asdict(job))}
msg_id: str = cast(str, await client.xadd(STREAM_KEY, payload)) # type: ignore[arg-type] # redis-py StreamCommands.xadd expects broader FieldT union; runtime behavior is correct
logger.info("Enqueued email job %s for user %s", msg_id, job.user_id)
return msg_id
async def consume_emails(
client: aioredis.Redis,
consumer_name: str,
count: int = 1,
block_ms: int = 5000,
) -> list[tuple[str, EmailJob]]:
"""Read pending messages from the stream. Returns list of (msg_id, EmailJob)."""
await ensure_consumer_group(client)
messages = await client.xreadgroup(
CONSUMER_GROUP, consumer_name, {STREAM_KEY: ">"}, count=count, block=block_ms
)
results = []
for _stream, entries in messages:
for msg_id, fields in entries:
job = EmailJob(**json.loads(fields["data"]))
results.append((msg_id, job))
return results
async def ack_email(client: aioredis.Redis, msg_id: str) -> None:
"""Acknowledge a processed message."""
await client.xack(STREAM_KEY, CONSUMER_GROUP, msg_id)
@@ -1 +0,0 @@
"""Async email receipt worker consuming from DragonflyDB Streams."""
@@ -1,104 +0,0 @@
"""Async worker that consumes email receipt jobs from DragonflyDB Streams."""
import asyncio
import logging
from cartsnitch_common.database import get_async_session_factory
from cartsnitch_common.models.user import User
from sqlalchemy import select
from receiptwitness.config import settings
from receiptwitness.events import publish_receipt_ingested
from receiptwitness.parsers.email.base import BaseEmailParser, EmailReceipt
from receiptwitness.parsers.email.detector import detect_retailer
from receiptwitness.parsers.email.kroger import KrogerEmailParser
from receiptwitness.parsers.email.meijer import MeijerEmailParser
from receiptwitness.parsers.email.target import TargetEmailParser
from receiptwitness.queue.email import ack_email, consume_emails, get_redis
logger = logging.getLogger(__name__)
CONSUMER_NAME = "worker-1"
# Registry of available email parsers
PARSERS: dict[str, BaseEmailParser] = {
"meijer": MeijerEmailParser(),
"kroger": KrogerEmailParser(),
"target": TargetEmailParser(),
}
async def resolve_user(token: str) -> str | None:
"""Look up user_id from email_inbound_token."""
session_factory = get_async_session_factory(settings.database_url)
async with session_factory() as session:
result = await session.execute(select(User.id).where(User.email_inbound_token == token))
row = result.scalar_one_or_none()
return str(row) if row else None
async def process_job(msg_id: str, job) -> bool:
"""Process a single email job. Returns True on success."""
# 1. Resolve user from token
user_id = await resolve_user(job.user_id) # user_id field holds token
if not user_id:
logger.warning("Unknown token %s, dropping message %s", job.user_id, msg_id)
return True # ack to avoid infinite retry
# 2. Build EmailReceipt
email = EmailReceipt(
sender=job.sender,
recipient=job.recipient,
subject=job.subject,
body_html=job.body_html,
body_plain=job.body_plain,
received_at=job.received_at,
)
# 3. Detect retailer
retailer = detect_retailer(email)
if not retailer or retailer not in PARSERS:
logger.warning(
"Unrecognized retailer from %s, archiving msg %s",
job.sender,
msg_id,
)
return True # ack — no parser available
# 4. Parse
parser = PARSERS[retailer]
parsed = parser.parse(email)
# 5. Publish event
await publish_receipt_ingested(
user_id=user_id,
store_slug=retailer,
purchase_id=parsed.get("receipt_id", msg_id),
purchase_date=parsed.get("purchase_date", ""),
item_count=len(parsed.get("items", [])),
total=parsed.get("total", 0),
)
return True
async def run_worker() -> None:
"""Main worker loop — consume and process email jobs."""
client = await get_redis()
logger.info("Email worker started, consuming from email:receipts")
while True:
try:
jobs = await consume_emails(client, CONSUMER_NAME, count=5, block_ms=5000)
for msg_id, job in jobs:
try:
success = await process_job(msg_id, job)
if success:
await ack_email(client, msg_id)
except Exception:
logger.exception("Failed to process email job %s", msg_id)
except Exception:
logger.exception("Worker loop error, retrying in 5s")
await asyncio.sleep(5)
if __name__ == "__main__":
asyncio.run(run_worker())
-4
View File
@@ -1,16 +1,12 @@
"""Shared test fixtures."""
import json
import os
from pathlib import Path
import pytest
FIXTURES_DIR = Path(__file__).parent / "fixtures"
os.environ.setdefault("RW_SESSION_ENCRYPTION_KEY", "test-secret-key-for-unit-tests-only-32bytes!")
os.environ.setdefault("RW_MAILGUN_WEBHOOK_SIGNING_KEY", "test-mailgun-signing-key")
@pytest.fixture
def meijer_receipt_data() -> dict:
-45
View File
@@ -1,45 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Kroger Digital Receipt</title>
</head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto; padding: 20px; color: #333;">
<div style="background-color: #0057a8; color: white; padding: 20px; text-align: center;">
<img src="https://www.kroger.com/email-logo.png" alt="Kroger" style="height: 40px;">
<h1 style="margin: 10px 0; font-size: 24px;">Your Digital Receipt</h1>
<p style="margin: 0;">Kroger Plus Member</p>
</div>
<div style="padding: 20px; background-color: #f5f5f5;">
<h2 style="color: #0057a8; margin-top: 0;">Kroger #882 - Downtown</h2>
<p style="margin: 5px 0;">123 Main Street<br>Anytown, OH 45202</p>
<p style="margin: 5px 0;"><strong>Date:</strong> 03/15/2026</p>
<p style="margin: 5px 0;"><strong>Receipt #:</strong> KR-2026-0315-4829</p>
<p style="margin: 5px 0;"><strong>Transaction #:</strong> TXN-789123456</p>
</div>
<div style="padding: 20px;">
<h3>Items Purchased</h3>
<p>Whole Milk 1 Gallon $3.99</p>
<p>Sourdough Bread $4.49</p>
<p>Free Range Eggs 12ct $5.99</p>
<p>Baby Spinach 5oz $4.29</p>
</div>
<div style="padding: 20px; background-color: #e8f4e8; border-left: 4px solid #0057a8;">
<p style="margin: 5px 0;"><strong>Subtotal:</strong> $18.76</p>
<p style="margin: 5px 0;"><strong>Tax:</strong> $1.24</p>
<p style="margin: 5px 0; color: #0057a8; font-weight: bold; font-size: 18px;">Total: $20.00</p>
</div>
<div style="padding: 15px; margin-top: 15px; background-color: #fff8e1; border-left: 4px solid #ffc107;">
<p style="margin: 0; font-size: 14px; color: #666;">Kroger Plus Savings: <strong>$3.25</strong> saved on this order.</p>
</div>
<div style="padding: 20px; text-align: center; color: #999; font-size: 12px; margin-top: 20px;">
<p>Thank you for shopping at Kroger!</p>
<p>Keep your receipt for returns within 90 days.</p>
</div>
</body>
</html>
-127
View File
@@ -1,127 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Meijer Digital Receipt</title>
<style>
body { font-family: Arial, sans-serif; background: #f4f4f4; margin: 0; padding: 20px; }
.receipt-container { background: #ffffff; max-width: 600px; margin: 0 auto; padding: 30px; border: 1px solid #dddddd; }
.header { background: #003399; color: #ffffff; padding: 20px; text-align: center; margin: -30px -30px 20px -30px; }
.header h1 { margin: 0; font-size: 24px; }
.store-info { text-align: center; margin-bottom: 20px; border-bottom: 2px dashed #cccccc; padding-bottom: 15px; }
.store-info h2 { margin: 0; font-size: 18px; color: #003399; }
.receipt-meta { display: flex; justify-content: space-between; font-size: 14px; color: #555555; margin-bottom: 20px; }
table { width: 100%; border-collapse: collapse; margin-bottom: 20px; }
th { background: #f0f0f0; text-align: left; padding: 8px 10px; font-size: 13px; color: #333333; }
td { padding: 8px 10px; border-bottom: 1px solid #eeeeee; font-size: 14px; }
.item-name { font-weight: bold; }
.totals { margin-left: auto; width: 250px; }
.totals-row { display: flex; justify-content: space-between; padding: 6px 0; font-size: 14px; }
.totals-row.grand-total { font-weight: bold; font-size: 16px; border-top: 2px solid #333333; padding-top: 10px; margin-top: 4px; }
.savings { color: #cc0000; }
.footer { text-align: center; font-size: 12px; color: #888888; margin-top: 20px; padding-top: 15px; border-top: 1px solid #dddddd; }
</style>
</head>
<body>
<div class="receipt-container">
<div class="header">
<h1>MEIJER</h1>
<p style="margin: 5px 0 0; font-size: 14px;">Digital Receipt</p>
</div>
<div class="store-info">
<h2>Meijer Store #42</h2>
<p style="margin: 5px 0 0; font-size: 13px; color: #666;">1555 Lake Drive SE, Grand Rapids, MI 49506</p>
</div>
<div class="receipt-meta">
<div>
<strong>Date:</strong> March 15, 2026<br />
<strong>Time:</strong> 2:34 PM
</div>
<div style="text-align: right;">
<strong>Transaction #</strong><br />
TXN-2026-0315-0042
</div>
</div>
<table>
<thead>
<tr>
<th>Item</th>
<th style="text-align: center;">Qty</th>
<th style="text-align: right;">Price</th>
</tr>
</thead>
<tbody>
<tr>
<td class="item-name">ORGANIC BANANAS</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$0.69</td>
</tr>
<tr>
<td class="item-name">WHOLE MILK 1 GAL</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.29</td>
</tr>
<tr>
<td class="item-name">MEIJER WHOLE GRAIN OAT CEREAL 18OZ</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.99</td>
</tr>
<tr>
<td class="item-name">FRESH BROCCOLI CROWN</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$2.49</td>
</tr>
<tr>
<td class="item-name">GROUND BEEF 85/15 1LB</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$6.99</td>
</tr>
<tr>
<td class="item-name">SOURDOUGH BREAD</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$3.99</td>
</tr>
<tr>
<td class="item-name">MEIJER BABY SPINACH 5OZ</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$4.49</td>
</tr>
<tr>
<td class="item-name">LARGE EGGS DOZEN</td>
<td style="text-align: center;">1</td>
<td style="text-align: right;">$3.29</td>
</tr>
</tbody>
</table>
<div class="totals">
<div class="totals-row">
<span>Subtotal</span>
<span>$31.22</span>
</div>
<div class="totals-row">
<span>Tax</span>
<span>$2.19</span>
</div>
<div class="totals-row savings">
<span>Total Savings</span>
<span>-$3.40</span>
</div>
<div class="totals-row grand-total">
<span>Total</span>
<span>$33.41</span>
</div>
</div>
<div class="footer">
<p>Thank you for shopping at Meijer!</p>
<p>Keep your receipt for your records.<br />
Questions? Call 1-800-927-8699 or visit meijer.com</p>
</div>
</div>
</body>
</html>
-44
View File
@@ -1,44 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<title>Target Order Confirmation</title>
</head>
<body style="font-family: Arial, sans-serif; max-width: 600px; margin: 0 auto; padding: 20px; color: #333;">
<div style="background-color: #cc0000; color: white; padding: 20px; text-align: center;">
<img src="https://assets.target.com/email-logo.png" alt="Target" style="height: 40px;">
<h1 style="margin: 10px 0; font-size: 24px;">Order Confirmation</h1>
<p style="margin: 0;">Thanks for shopping Target Circle!</p>
</div>
<div style="padding: 20px; background-color: #f5f5f5;">
<h2 style="color: #cc0000; margin-top: 0;">Target Store #1247 - Riverside</h2>
<p style="margin: 5px 0;">4500 River Road<br>Columbus, OH 43220</p>
<p style="margin: 5px 0;"><strong>Date:</strong> 03/18/2026</p>
<p style="margin: 5px 0;"><strong>Order #:</strong> TGT-2026-0318-9124</p>
<p style="margin: 5px 0;"><strong>Confirmation #:</strong> CNF-44772819</p>
</div>
<div style="padding: 20px;">
<h3>Items Purchased</h3>
<p>Good & Gather Whole Milk 1 Gal $3.89</p>
<p>Arborio Rice 2lb bag $6.49</p>
<p>Parmesan Wedge 8oz $7.99</p>
</div>
<div style="padding: 20px; background-color: #fff8e1; border-left: 4px solid #cc0000;">
<p style="margin: 5px 0;"><strong>Subtotal:</strong> $18.37</p>
<p style="margin: 5px 0;"><strong>Tax:</strong> $1.45</p>
<p style="margin: 5px 0; color: #cc0000; font-weight: bold; font-size: 18px;">Total: $19.82</p>
</div>
<div style="padding: 15px; margin-top: 15px; background-color: #e8f4e8; border-left: 4px solid #4caf50;">
<p style="margin: 0; font-size: 14px; color: #333;">Target Circle offer saved you <strong>$0.30</strong> on this order.</p>
</div>
<div style="padding: 20px; text-align: center; color: #999; font-size: 12px; margin-top: 20px;">
<p>Questions? Call Target Guest Services at 1-800-591-3869.</p>
<p>Receipt valid for returns within 30 days.</p>
</div>
</body>
</html>
@@ -1 +0,0 @@
"""Tests for the ReceiptWitness API routes."""
@@ -1,125 +0,0 @@
"""Tests for the /inbound/email webhook endpoint."""
import hashlib
import hmac
import time
from unittest.mock import AsyncMock, patch
import pytest
from fastapi.testclient import TestClient
from receiptwitness.main import app
@pytest.fixture
def client():
return TestClient(app)
@pytest.fixture
def mock_redis():
redis_mock = AsyncMock()
with patch("receiptwitness.api.routes.get_redis", return_value=redis_mock):
enqueue_patcher = patch("receiptwitness.api.routes.enqueue_email", new_callable=AsyncMock)
with enqueue_patcher as mock_enqueue:
yield {"redis": redis_mock, "enqueue": mock_enqueue}
def make_signature(signing_key: str, token: str, timestamp: str) -> str:
return hmac.new(
signing_key.encode(),
f"{timestamp}{token}".encode(),
hashlib.sha256,
).hexdigest()
def valid_form(signing_key: str = "test-secret"):
ts = str(int(time.time()))
token = "test-token"
sig = make_signature(signing_key, token, ts)
return {
"token": token,
"timestamp": ts,
"signature": sig,
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Your Meijer Receipt",
"body-html": "<p>Thank you for shopping at Meijer</p>",
"body-plain": "Thank you for shopping at Meijer",
"Message-Id": "<msg-001@example.com>",
}
def test_valid_webhook(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
response = client.post("/inbound/email", data=valid_form())
assert response.status_code == 200
assert response.json() == {"status": "queued"}
mock_redis["enqueue"].assert_awaited_once()
def test_invalid_signature(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = valid_form()
form["signature"] = "wrong-signature"
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_invalid_recipient_no_plus(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = valid_form()
form["recipient"] = "receipts@example.com" # no plus-address
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid recipient"
mock_redis["enqueue"].assert_not_awaited()
def test_stale_timestamp(client, mock_redis):
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
ts = str(int(time.time()) - 600) # 10 min old
token = "test-token"
sig = make_signature("test-secret", token, ts)
form = {
"token": token,
"timestamp": ts,
"signature": sig,
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Receipt",
}
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_invalid_timestamp_returns_406(client, mock_redis):
"""Empty timestamp should return 406, not 500."""
with patch("receiptwitness.api.routes.settings") as mock_settings:
mock_settings.mailgun_webhook_signing_key = "test-secret"
form = {
"token": "test-token",
"timestamp": "",
"signature": "any-sig",
"sender": "sender@example.com",
"recipient": "receipts+user123@example.com",
"subject": "Receipt",
}
response = client.post("/inbound/email", data=form)
assert response.status_code == 406
assert response.json()["detail"] == "Invalid signature"
mock_redis["enqueue"].assert_not_awaited()
def test_get_inbound_email_returns_405(client):
"""GET /inbound/email is not allowed."""
response = client.get("/inbound/email")
assert response.status_code == 405
-46
View File
@@ -1,46 +0,0 @@
import pytest
from receiptwitness.config import ReceiptWitnessSettings
def test_valid_config():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8="
)
assert s.session_encryption_key
def test_missing_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="")
def test_placeholder_session_encryption_key_raises():
with pytest.raises(ValueError, match="RW_SESSION_ENCRYPTION_KEY"):
ReceiptWitnessSettings(session_encryption_key="change-me-in-production")
def test_notifications_enabled_without_resend_key_raises():
with pytest.raises(ValueError, match="RW_RESEND_API_KEY"):
ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="",
)
def test_notifications_disabled_without_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=False,
resend_api_key="",
)
assert s.notifications_enabled is False
def test_notifications_enabled_with_resend_key_ok():
s = ReceiptWitnessSettings(
session_encryption_key="7reF42nmTwbdN21PBoubGp7h_FU8qSimstmlaMLoRK8=",
notifications_enabled=True,
resend_api_key="re_test_1234567890",
)
assert s.resend_api_key == "re_test_1234567890"
@@ -1,84 +0,0 @@
"""Tests for email notifications."""
from unittest.mock import patch
import pytest
class TestSendReceiptNotification:
@pytest.fixture
def mock_resend(self):
with patch("receiptwitness.notifications.email.resend") as mock:
yield mock
@pytest.mark.asyncio
async def test_sends_email_with_correct_params(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with (
patch("receiptwitness.notifications.email.settings") as mock_settings,
patch(
"receiptwitness.notifications.email.asyncio.to_thread",
new=lambda fn, *args, **kwargs: fn(*args, **kwargs),
),
):
mock_settings.notifications_enabled = True
mock_settings.resend_api_key = "re_testkey_123"
mock_settings.notification_email_from = "noreply@test.com"
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_called_once_with(
{
"from": "noreply@test.com",
"to": ["user@example.com"],
"subject": "Receipt processed: Meijer - $42.99",
"html": (
"<p>Your receipt from <strong>Meijer</strong> on "
"2026-03-28 has been processed.</p>"
"<p>5 items, total: $42.99</p>"
),
}
)
@pytest.mark.asyncio
async def test_skips_when_disabled(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with patch("receiptwitness.notifications.email.settings") as mock_settings:
mock_settings.notifications_enabled = False
mock_settings.resend_api_key = "re_testkey_123"
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_not_called()
@pytest.mark.asyncio
async def test_skips_when_api_key_empty(self, mock_resend):
from receiptwitness.notifications.email import send_receipt_notification
with patch("receiptwitness.notifications.email.settings") as mock_settings:
mock_settings.notifications_enabled = True
mock_settings.resend_api_key = ""
await send_receipt_notification(
user_email="user@example.com",
store_name="Meijer",
item_count=5,
total=42.99,
purchase_date="2026-03-28",
)
mock_resend.Emails.send.assert_not_called()
@@ -1,49 +0,0 @@
"""Tests for retailer detector."""
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.detector import detect_retailer
def test_detect_meijer():
email = EmailReceipt(
sender="receipts@meijer.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "meijer"
def test_detect_kroger():
email = EmailReceipt(
sender="noreply@email.kroger.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "kroger"
def test_detect_target():
email = EmailReceipt(
sender="Target <receipts@target.com>",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "target"
def test_detect_unknown():
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) is None
def test_detect_case_insensitive():
email = EmailReceipt(
sender="Receipts@MEIJER.COM",
recipient="user@example.com",
subject="Your Receipt",
)
assert detect_retailer(email) == "meijer"
@@ -1,93 +0,0 @@
"""Tests for KrogerEmailParser."""
from pathlib import Path
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.kroger import KrogerEmailParser
FIXTURE_PATH = Path(__file__).parent.parent.parent / "fixtures" / "kroger_email_receipt.html"
class TestKrogerEmailParser:
"""Tests for KrogerEmailParser."""
def setup_method(self) -> None:
self.parser = KrogerEmailParser()
self.fixture_html = FIXTURE_PATH.read_text()
def test_can_parse_kroger_sender(self) -> None:
email = EmailReceipt(
sender="noreply@email.kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
assert self.parser.can_parse(email) is True
def test_can_parse_kroger_in_body(self) -> None:
email = EmailReceipt(
sender="someone@unknown.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Kroger digital receipt</body></html>",
)
assert self.parser.can_parse(email) is True
def test_cannot_parse_unrelated(self) -> None:
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Walmart receipt</body></html>",
)
assert self.parser.can_parse(email) is False
def test_parse_items(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
items = result.get("items", [])
assert len(items) >= 3
product_names = [item["product_name_raw"] for item in items]
assert any("Whole Milk" in name for name in product_names)
assert any("Sourdough" in name for name in product_names)
for item in items:
assert "unit_price" in item
assert "extended_price" in item
def test_parse_totals(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
total = result.get("total", 0)
assert total > 0
def test_parse_receipt_id(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
receipt_id = result.get("receipt_id", "")
assert "KR-2026" in receipt_id or "TXN" in receipt_id
def test_parse_date(self) -> None:
email = EmailReceipt(
sender="noreply@kroger.com",
recipient="user@example.com",
subject="Your Kroger Receipt",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
purchase_date = result.get("purchase_date", "")
assert purchase_date == "2026-03-15"
@@ -1,182 +0,0 @@
"""Tests for the Meijer email receipt parser."""
import os
from decimal import Decimal
import pytest
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.meijer import MeijerEmailParser
FIXTURE_PATH = os.path.join(
os.path.dirname(__file__), "..", "..", "fixtures", "meijer_email_receipt.html"
)
def load_fixture() -> str:
with open(FIXTURE_PATH) as f:
return f.read()
@pytest.fixture
def meijer_email() -> EmailReceipt:
html = load_fixture()
return EmailReceipt(
sender="Meijer Receipts <receipts@email.meijer.com>",
recipient="shopper@example.com",
subject="Your Meijer Receipt — Transaction #TXN-2026-0315-0042",
body_html=html,
body_plain=None,
received_at="2026-03-15T14:34:00Z",
)
@pytest.fixture
def kroger_email() -> EmailReceipt:
return EmailReceipt(
sender="Kroger <noreply@email.kroger.com>",
recipient="shopper@example.com",
subject="Your Kroger Receipt",
body_html="<html><body>Kroger receipt</body></html>",
)
class TestCanParse:
def test_can_parse_meijer(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
assert parser.can_parse(meijer_email) is True
def test_cannot_parse_kroger(self, kroger_email: EmailReceipt):
parser = MeijerEmailParser()
assert parser.can_parse(kroger_email) is False
def test_can_parse_meijer_plain_sender(self):
email = EmailReceipt(
sender="receipts@meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html="<html></html>",
)
parser = MeijerEmailParser()
assert parser.can_parse(email) is True
def test_cannot_parse_non_meijer(self):
email = EmailReceipt(
sender=" Target <no-reply@target.com>",
recipient="shopper@example.com",
subject="Target Receipt",
body_html="<html></html>",
)
parser = MeijerEmailParser()
assert parser.can_parse(email) is False
class TestParseMeijerReceipt:
def test_receipt_id_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["receipt_id"] == "TXN-2026-0315-0042"
def test_purchase_date_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["purchase_date"] == "2026-03-15"
def test_items_extracted(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
items = result["items"]
assert len(items) == 8
names = [item["product_name_raw"] for item in items]
assert "ORGANIC BANANAS" in names
assert "WHOLE MILK 1 GAL" in names
assert "GROUND BEEF 85/15 1LB" in names
def test_item_quantities(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
# Find ORGANIC BANANAS
bananas = next(i for i in result["items"] if "BANANAS" in i["product_name_raw"])
assert bananas["quantity"] == Decimal("1")
def test_item_prices(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
# Find ORGANIC BANANAS
bananas = next(i for i in result["items"] if "BANANAS" in i["product_name_raw"])
assert bananas["unit_price"] == Decimal("0.69")
assert bananas["extended_price"] == Decimal("0.69")
def test_totals(self, meijer_email: EmailReceipt):
parser = MeijerEmailParser()
result = parser.parse(meijer_email)
assert result["total"] == Decimal("33.41")
assert result["subtotal"] == Decimal("31.22")
assert result["tax"] == Decimal("2.19")
assert result["savings_total"] == Decimal("3.40")
class TestParseHandlesMissingFields:
def test_missing_body_html_falls_back_to_plain(self):
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Your Meijer Receipt",
body_html=None,
body_plain="TXN-1234 | March 15, 2026 | Total: $10.00",
)
parser = MeijerEmailParser()
result = parser.parse(email)
# Should not raise, returns minimal result
assert result["receipt_id"] == ""
assert result["purchase_date"] == "2026-03-15"
assert result["total"] == Decimal("10.00")
def test_empty_email(self):
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html="",
body_plain="",
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["receipt_id"] == ""
assert result["purchase_date"] == ""
assert result["total"] == Decimal("0")
assert result["items"] == []
def test_missing_subject_date_from_body(self):
html = """
<html>
<body>
<p>Thank you for shopping on April 1, 2026</p>
<p>Total: $15.00</p>
</body>
</html>
"""
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject=None,
body_html=html,
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["purchase_date"] == "2026-04-01"
def test_missing_totals_defaults_to_zero(self):
html = "<html><body><p>Just an email with no totals</p></body></html>"
email = EmailReceipt(
sender="receipts@email.meijer.com",
recipient="shopper@example.com",
subject="Receipt",
body_html=html,
)
parser = MeijerEmailParser()
result = parser.parse(email)
assert result["total"] == Decimal("0")
assert result["subtotal"] is None
assert result["tax"] is None
@@ -1,93 +0,0 @@
"""Tests for TargetEmailParser."""
from pathlib import Path
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.parsers.email.target import TargetEmailParser
FIXTURE_PATH = Path(__file__).parent.parent.parent / "fixtures" / "target_email_receipt.html"
class TestTargetEmailParser:
"""Tests for TargetEmailParser."""
def setup_method(self) -> None:
self.parser = TargetEmailParser()
self.fixture_html = FIXTURE_PATH.read_text()
def test_can_parse_target_sender(self) -> None:
email = EmailReceipt(
sender="receipts@target.com",
recipient="user@example.com",
subject="Your Target Order Confirmation",
body_html=self.fixture_html,
)
assert self.parser.can_parse(email) is True
def test_can_parse_circle_in_body(self) -> None:
email = EmailReceipt(
sender="someone@unknown.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Target Circle savings offer</body></html>",
)
assert self.parser.can_parse(email) is True
def test_cannot_parse_unrelated(self) -> None:
email = EmailReceipt(
sender="noreply@walmart.com",
recipient="user@example.com",
subject="Your Receipt",
body_html="<html><body>Walmart receipt</body></html>",
)
assert self.parser.can_parse(email) is False
def test_parse_items(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
items = result.get("items", [])
assert len(items) >= 3
product_names = [item["product_name_raw"] for item in items]
assert any("Whole Milk" in name for name in product_names)
assert any("Arborio" in name for name in product_names)
for item in items:
assert "unit_price" in item
assert "extended_price" in item
def test_parse_totals(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
total = result.get("total", 0)
assert total > 0
def test_parse_receipt_id(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
receipt_id = result.get("receipt_id", "")
assert "TGT-2026" in receipt_id or "CNF" in receipt_id
def test_parse_date(self) -> None:
email = EmailReceipt(
sender="orders@target.com",
recipient="user@example.com",
subject="Your Target Order",
body_html=self.fixture_html,
)
result = self.parser.parse(email)
purchase_date = result.get("purchase_date", "")
assert purchase_date == "2026-03-18"
@@ -1,79 +0,0 @@
"""Tests for email queue using DragonflyDB Streams."""
import pytest
from fakeredis import aioredis as fake_aioredis
from receiptwitness.queue.email import (
CONSUMER_GROUP,
STREAM_KEY,
EmailJob,
ack_email,
consume_emails,
enqueue_email,
ensure_consumer_group,
)
@pytest.fixture
async def fake_client():
"""Yield a fake async Redis client."""
client = fake_aioredis.FakeRedis(decode_responses=True)
yield client
await client.aclose()
@pytest.fixture
def sample_job():
"""Sample EmailJob for testing."""
return EmailJob(
user_id="user-123",
sender="no-reply@kroger.com",
recipient="user@example.com",
subject="Kroger Receipt",
body_html="<html><body>Receipt</body></html>",
body_plain="Receipt",
received_at="2026-04-01T12:00:00Z",
message_id="msg-abc-123",
)
@pytest.mark.asyncio
async def test_enqueue_and_consume(fake_client, sample_job):
"""Enqueue a job, consume it, verify fields match."""
msg_id = await enqueue_email(fake_client, sample_job)
assert msg_id is not None
consumed = await consume_emails(fake_client, "test-worker", count=1, block_ms=100)
assert len(consumed) == 1
consumed_id, consumed_job = consumed[0]
assert consumed_id == msg_id
assert consumed_job.user_id == sample_job.user_id
assert consumed_job.sender == sample_job.sender
assert consumed_job.recipient == sample_job.recipient
assert consumed_job.subject == sample_job.subject
assert consumed_job.message_id == sample_job.message_id
@pytest.mark.asyncio
async def test_ack_removes_from_pending(fake_client, sample_job):
"""After ack, message is no longer pending."""
msg_id = await enqueue_email(fake_client, sample_job)
# Consume the message (moves it to pending)
consumed = await consume_emails(fake_client, "test-worker", count=1, block_ms=100)
assert len(consumed) == 1
# Acknowledge it
await ack_email(fake_client, msg_id)
# Check pending count for this consumer group
pending = await fake_client.xpending(STREAM_KEY, CONSUMER_GROUP)
assert pending is None or pending["pending"] == 0
@pytest.mark.asyncio
async def test_ensure_consumer_group_idempotent(fake_client):
"""Calling ensure_consumer_group twice does not error."""
await ensure_consumer_group(fake_client)
# Calling again should not raise
await ensure_consumer_group(fake_client)
@@ -1,188 +0,0 @@
"""Tests for email_worker."""
from decimal import Decimal
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from fakeredis import aioredis as fake_aioredis
from receiptwitness.parsers.email.base import EmailReceipt
from receiptwitness.queue.email import (
EmailJob,
)
from receiptwitness.worker.email_worker import (
process_job,
resolve_user,
)
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
async def fake_redis():
"""Fake async Redis client for queue testing."""
client = fake_aioredis.FakeRedis(decode_responses=True)
yield client
await client.aclose()
@pytest.fixture
def sample_email_job():
"""Sample EmailJob matching DragonflyDB queue schema."""
return EmailJob(
user_id="token-abc-123",
sender="no-reply@meijer.com",
recipient="user@example.com",
subject="Your Meijer Receipt",
body_html="<html><body>Total: $42.00</body></html>",
body_plain="Total: $42.00",
received_at="2026-04-01T12:00:00Z",
message_id="msg-xyz-789",
)
@pytest.fixture
def sample_email():
"""Sample EmailReceipt for parser testing."""
return EmailReceipt(
sender="no-reply@meijer.com",
recipient="user@example.com",
subject="Your Meijer Receipt",
body_html="<html><body>Total: $42.00<br/>Receipt #12345</body></html>",
body_plain="Total: $42.00",
received_at="2026-04-01T12:00:00Z",
)
# ---------------------------------------------------------------------------
# resolve_user tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_resolve_user_valid_token():
"""Valid token returns user_id string."""
mock_session = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = "user-uuid-42"
mock_session.execute.return_value = mock_result
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
factory = MagicMock(return_value=mock_session)
with patch(
"receiptwitness.worker.email_worker.get_async_session_factory",
return_value=factory,
):
user_id = await resolve_user("token-abc-123")
assert user_id == "user-uuid-42"
factory.assert_called_once()
@pytest.mark.asyncio
async def test_resolve_user_invalid_token():
"""Invalid token returns None."""
mock_session = AsyncMock()
mock_result = MagicMock()
mock_result.scalar_one_or_none.return_value = None
mock_session.execute.return_value = mock_result
mock_session.__aenter__ = AsyncMock(return_value=mock_session)
mock_session.__aexit__ = AsyncMock(return_value=None)
factory = MagicMock(return_value=mock_session)
with patch(
"receiptwitness.worker.email_worker.get_async_session_factory",
return_value=factory,
):
user_id = await resolve_user("bad-token")
assert user_id is None
# ---------------------------------------------------------------------------
# process_job tests
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_process_job_unknown_retailer(sample_email_job):
"""Unknown retailer logs warning and returns True (ack, no retry)."""
unknown_job = EmailJob(
user_id="token-abc-123",
sender="no-reply@unknownretailer.com",
recipient="user@example.com",
subject="Receipt",
body_html="<html></html>",
body_plain="",
received_at="2026-04-01T12:00:00Z",
message_id="msg-xyz-789",
)
with (
patch(
"receiptwitness.worker.email_worker.resolve_user",
return_value="user-uuid-42",
),
patch(
"receiptwitness.worker.email_worker.publish_receipt_ingested",
new_callable=AsyncMock,
) as mock_publish,
):
result = await process_job("msg-id-1", unknown_job)
assert result is True
mock_publish.assert_not_called()
@pytest.mark.asyncio
async def test_process_job_success(sample_email_job, sample_email):
"""Known retailer: full pipeline runs — parse, normalize, publish event."""
parsed_data = {
"receipt_id": "RCP-999",
"purchase_date": "2026-04-01",
"total": Decimal("42.00"),
"items": [
{
"product_name_raw": "ORGANIC BANANAS",
"quantity": Decimal("1"),
"unit_price": Decimal("0.69"),
"extended_price": Decimal("0.69"),
},
],
}
mock_parser = MagicMock()
mock_parser.parse.return_value = parsed_data
with (
patch(
"receiptwitness.worker.email_worker.resolve_user",
return_value="user-uuid-42",
),
patch.dict(
"receiptwitness.worker.email_worker.PARSERS",
{"meijer": mock_parser},
clear=False,
),
patch(
"receiptwitness.worker.email_worker.publish_receipt_ingested",
new_callable=AsyncMock,
) as mock_publish,
):
result = await process_job("msg-id-1", sample_email_job)
assert result is True
mock_parser.parse.assert_called_once()
mock_publish.assert_called_once_with(
user_id="user-uuid-42",
store_slug="meijer",
purchase_id="RCP-999",
purchase_date="2026-04-01",
item_count=1,
total=Decimal("42.00"),
)
-61
View File
@@ -1,61 +0,0 @@
# seed-dev-job.yaml
# K8s Job to run the CartSnitch seed runner against the dev database.
#
# Usage:
# kubectl apply -f seed-dev-job.yaml -n cartsnitch-dev
#
# To view logs:
# kubectl logs -n cartsnitch-dev job/seed-dev -f
#
# To re-run after fixing issues:
# kubectl delete -f seed-dev-job.yaml -n cartsnitch-dev && kubectl apply -f seed-dev-job.yaml -n cartsnitch-dev
#
apiVersion: batch/v1
kind: Job
metadata:
name: seed-dev
namespace: cartsnitch-dev
labels:
app: cartsnitch
component: seed
environment: dev
annotations:
description: "Runs cartsnitch-common seed runner to populate dev database with realistic test data."
spec:
# Prevent retries — a failed seed run should be investigated, not auto-repeated.
backoffLimit: 0
# Do not run concurrently; sequential runs are safer for truncate+reseed.
concurrencyPolicy: Forbid
template:
metadata:
labels:
app: cartsnitch
component: seed
environment: dev
spec:
restartPolicy: Never
containers:
- name: seed
# Use slim Python image with the cartsnitch-common package installed from git.
# The common repo is public; no additional secret is needed for the pip install.
image: python:3.12-slim
command:
- sh
- -c
- |
pip install --no-cache-dir "cartsnitch-common @ git+https://github.com/cartsnitch/common.git@main" && \
python -m cartsnitch_common.seed --database-url "$${DATABASE_URL}"
env:
- name: DATABASE_URL
valueFrom:
secretKeyRef:
name: cartsnitch-secrets
key: database-url-pg
optional: false
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 512Mi
-104
View File
@@ -1,104 +0,0 @@
#!/usr/bin/env bash
# =============================================================================
# seed-dev.sh — Run the CartSnitch seed runner against the dev database.
#
# Usage:
# ./seed-dev.sh Run full seed against dev
# ./seed-dev.sh --dry-run Show planned record counts without writing
# ./seed-dev.sh --help Show this help
#
# Prerequisites:
# - kubectl configured for the cartsnitch-dev cluster
# - Namespace cartsnitch-dev exists (CNPG Postgres must be running)
#
# What it does:
# 1. Starts a background port-forward to cartsnitch-pg-rw:5432
# 2. Waits for the tunnel to be ready
# 3. Runs python -m cartsnitch_common.seed with --database-url pointing
# to localhost:<forwarded-port>/cartsnitch
# 4. Cleans up the port-forward on exit (normal, interrupt, or error)
# =============================================================================
set -euo pipefail
# --- Config -------------------------------------------------------------------
readonly NAMESPACE="cartsnitch-dev"
readonly SVC_NAME="cartsnitch-pg-rw"
readonly LOCAL_PORT="5433" # use a non-privileged port to avoid conflicts
readonly DB_NAME="cartsnitch"
readonly PG_USER="cartsnitch"
# Retrieve password from the CNPG credentials secret
readonly PG_PASSWORD="$(
kubectl get secret cartsnitch-pg-credentials \
-n "$NAMESPACE" \
-o jsonpath='{.data.password}' \
| base64 -d
)"
readonly DB_URL="postgresql://${PG_USER}:${PG_PASSWORD}@localhost:${LOCAL_PORT}/${DB_NAME}"
# --- Helpers ------------------------------------------------------------------
log() { echo "[seed-dev] $*"; }
fail() { log "ERROR: $*" >&2; exit 1; }
# Cleanup port-forward and exit.
cleanup() {
if [[ -n "${PF_PID:-}" ]]; then
log "Stopping port-forward (PID $PF_PID)..."
kill "$PF_PID" 2>/dev/null || true
wait "$PF_PID" 2>/dev/null || true
fi
}
trap cleanup EXIT
# --- Args ---------------------------------------------------------------------
DRY_RUN=""
HELP_FLAG=""
while [[ $# -gt 0 ]]; do
case "$1" in
--dry-run) DRY_RUN="--dry-run"; shift ;;
--help) HELP_FLAG="1"; shift ;;
*) fail "Unknown argument: $1";;
esac
done
if [[ -n "$HELP_FLAG" ]]; then
sed -n '3,/^# ---/p' "$0" | head -n -1 | sed 's/^# //'
echo ""
echo "Additional arguments are passed through to the seed runner."
echo "Common seed-runner options:"
echo " --dry-run Show planned record counts without writing"
echo " --seed N Set random seed (default: 42)"
exit 0
fi
# --- Prerequisites ------------------------------------------------------------
if ! command -v kubectl &>/dev/null; then
fail "kubectl not found — must be installed and configured."
fi
# --- Port-forward -------------------------------------------------------------
log "Starting port-forward ${SVC_NAME}:5432 -> localhost:${LOCAL_PORT} ..."
kubectl port-forward \
-n "$NAMESPACE" \
svc/"$SVC_NAME" \
"${LOCAL_PORT}:5432" \
&>/dev/null &
PF_PID=$!
# Give the tunnel a moment to establish
sleep 2
# Verify the tunnel is up
if ! kill -0 "$PF_PID" 2>/dev/null; then
fail "Port-forward failed to start."
fi
log "Port-forward active (PID $PF_PID) on localhost:${LOCAL_PORT}"
# --- Seed --------------------------------------------------------------------
log "Running seed against dev database..."
set -x
python -m cartsnitch_common.seed --database-url "$DB_URL" $DRY_RUN
set +x
log "Done."
+31 -3
View File
@@ -1,8 +1,13 @@
import React, { Suspense } from 'react'
import { Link } from 'react-router-dom'
import { authClient } from '../lib/auth-client.ts'
import { usePurchases, usePriceAlerts } from '../hooks/useApi.ts'
import { usePurchases, usePriceAlerts, usePriceHistory } from '../hooks/useApi.ts'
import { StoreIcon } from '../components/StoreIcon.tsx'
const LazySparklineCard = React.lazy(() =>
import('../components/SparklineChart.tsx').then((mod) => ({ default: mod.SparklineCard }))
)
export function Dashboard() {
const { data: session, isPending } = authClient.useSession()
@@ -39,11 +44,19 @@ export function Dashboard() {
function AuthenticatedDashboard({ userName }: { userName: string }) {
const { data: purchases = [], isLoading: purchasesLoading } = usePurchases()
const { data: alerts = [], isLoading: alertsLoading } = usePriceAlerts()
const { data: eggHistory = [] } = usePriceHistory('prod10')
const { data: milkHistory = [] } = usePriceHistory('prod1')
const triggeredAlerts = alerts.filter((a) => a.triggered)
const watchingAlerts = alerts.filter((a) => !a.triggered)
const recentPurchases = purchases.slice(0, 3)
const sparklineData = eggHistory.filter((p) => p.storeId === 'meijer').slice(-8)
const milkSparkline = milkHistory.filter((p) => p.storeId === 'kroger').slice(-8)
const eggCurrent = sparklineData.length > 0 ? `$${sparklineData[sparklineData.length - 1].price.toFixed(2)}` : '—'
const milkCurrent = milkSparkline.length > 0 ? `$${milkSparkline[milkSparkline.length - 1].price.toFixed(2)}` : '—'
if (purchasesLoading || alertsLoading) {
return <DashboardSkeleton />
}
@@ -93,8 +106,11 @@ function AuthenticatedDashboard({ userName }: { userName: string }) {
{/* Price trend sparklines */}
<section className="mt-6">
<h2 className="mb-3 text-lg font-semibold text-gray-700">Price Trends</h2>
<div className="rounded-xl bg-white p-4 shadow-sm text-center text-sm text-gray-400">
Connect a store to see price trends
<div className="space-y-3">
<Suspense fallback={<SparklinePlaceholder />}>
<LazySparklineCard label="Eggs (dozen)" data={sparklineData} current={eggCurrent} />
<LazySparklineCard label="Whole Milk (1 gal)" data={milkSparkline} current={milkCurrent} />
</Suspense>
</div>
</section>
@@ -171,3 +187,15 @@ function DashboardSkeleton() {
</div>
)
}
function SparklinePlaceholder() {
return (
<div className="flex items-center gap-4 rounded-xl bg-white p-4 shadow-sm animate-pulse">
<div className="min-w-0 flex-1">
<div className="h-4 w-24 rounded bg-gray-200" />
<div className="mt-2 h-6 w-16 rounded bg-gray-200" />
</div>
<div className="h-10 w-24 rounded bg-gray-100" />
</div>
)
}
-45
View File
@@ -1,4 +1,3 @@
import { useState, useEffect } from 'react'
import { Link, useNavigate } from 'react-router-dom'
import { authClient } from '../lib/auth-client.ts'
import { useAuthStore } from '../stores/auth.ts'
@@ -10,26 +9,6 @@ export function Settings() {
const setAuthenticated = useAuthStore((s) => s.setAuthenticated)
const navigate = useNavigate()
const { theme, setTheme } = useThemeStore()
const [emailInAddress, setEmailInAddress] = useState<string | null>(null)
const [copied, setCopied] = useState(false)
useEffect(() => {
if (!session?.user) return
fetch('/api/v1/me/email-in-address', {
credentials: 'include',
})
.then((res) => res.json())
.then((data) => setEmailInAddress(data.email_address))
.catch(() => setEmailInAddress(null))
}, [session])
async function handleCopyEmail() {
if (emailInAddress) {
await navigator.clipboard.writeText(emailInAddress)
setCopied(true)
setTimeout(() => setCopied(false), 2000)
}
}
const user = session?.user
const connectedStores: string[] = []
@@ -134,30 +113,6 @@ export function Settings() {
</button>
</div>
</section>
{/* Receipt Email section */}
<section className="mt-6">
<h2 className="mb-3 text-sm font-semibold text-gray-500">Receipt Email</h2>
<div className="rounded-xl bg-white p-4 shadow-sm">
<p className="mb-2 text-sm text-gray-600">
Forward your digital receipt emails to this address:
</p>
<div className="flex items-center gap-2">
<code className="flex-1 rounded-lg bg-gray-100 px-3 py-2 text-sm font-mono text-gray-800 truncate">
{emailInAddress ?? 'Loading...'}
</code>
<button
onClick={handleCopyEmail}
className="rounded-lg bg-brand-blue px-3 py-2 text-sm font-medium text-white hover:bg-brand-blue/90 transition-colors"
>
{copied ? 'Copied!' : 'Copy'}
</button>
</div>
<p className="mt-2 text-xs text-gray-400">
Supports Meijer, Kroger, and Target receipt emails.
</p>
</div>
</section>
</div>
)
}