Files
cartsnitch/api/alembic/versions/001_encrypt_session_data.py
T
Barcode Betty a54ea423ef fix(api): widen alembic_version.version_num in migration 001 (CAR-1302)
Alembic hardcodes alembic_version.version_num to VARCHAR(32) in
DefaultImpl.version_table_impl, and version_table_column_width is NOT a
real kwarg that context.configure() honors — it's silently ignored, so
the env.py change alone was never going to take effect on a fresh DB.

Our descriptive revision ids exceed 32 chars (e.g. 003_make_users_hashed_
password_nullable = 39, common 002_add_normalized_products_upc_variants_
index = 46), so the 003 / common 002 stamp fails with StringDataRight-
Truncation, the whole chain rolls back, and the column is recreated at
VARCHAR(32) on the next attempt.

Fix:
- api/alembic/versions/001_encrypt_session_data.py: insert ALTER TABLE
  alembic_version ALTER COLUMN version_num TYPE VARCHAR(128) as the very
  first statement of upgrade(), before any early-return path. Idempotent
  when the column is already wider (e.g. the CAR-1298 one-shot Job).
- common/alembic/versions/001_add_email_inbound_token.py: same defensive
  ALTER as the first statement of upgrade() (common is a library, not
  deployed, but the 46-char 002 id would have hit the same trap).
- api/alembic/env.py: remove the phantom version_table_column_width=128
  kwarg from both context.configure() call sites — it was a no-op and
  misled the original investigation.

No downgrade() changes: a matching narrowing could truncate.

Refs CAR-1302 (durable root fix), CAR-1298 (prod workaround this
replaces). Verified against a fresh PostgreSQL — all 9 api migrations
upgrade head with no StringDataRightTruncation, and common 001/002 stamp
the 46-char id cleanly. Cluster has pgcrypto enabled by the operator.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
2026-06-10 04:49:33 +00:00

113 lines
3.6 KiB
Python

"""Encrypt existing plaintext session_data with Fernet.
Revision ID: 001_encrypt_session_data
Revises:
Create Date: 2026-03-19
"""
import json
import os
import sqlalchemy as sa
from cryptography.fernet import Fernet
from sqlalchemy import text
from alembic import op
revision = "001_encrypt_session_data"
down_revision = None
branch_labels = None
depends_on = None
def _get_fernet() -> Fernet:
key = os.environ.get("CARTSNITCH_FERNET_KEY")
if not key:
raise RuntimeError("CARTSNITCH_FERNET_KEY must be set to run this migration")
return Fernet(key.encode())
def _is_fernet_token(value: str) -> bool:
"""Check if a string looks like a Fernet token (base64 starting with gAAAAA)."""
return value.startswith("gAAAAA")
def upgrade() -> None:
# Alembic hardcodes alembic_version.version_num to VARCHAR(32)
# (DefaultImpl.version_table_impl) and exposes no option to widen it
# (version_table_column_width is NOT a real kwarg — it is silently ignored).
# Our descriptive revision ids exceed 32 chars (e.g.
# 003_make_users_hashed_password_nullable = 39), so widen the column as the
# very first migration statement, before any early-return path below.
# Idempotent: a no-op when already wider (e.g. pre-created by the CAR-1298 Job).
op.execute("ALTER TABLE alembic_version ALTER COLUMN version_num TYPE VARCHAR(128)")
conn = op.get_bind()
inspector = sa.inspect(conn)
# Fresh DB — table created by Base.metadata.create_all with correct TEXT type
if not inspector.has_table("user_store_accounts"):
return
# Already migrated? Skip if session_data is already TEXT (not JSON)
cols = {c["name"]: c for c in inspector.get_columns("user_store_accounts")}
if "session_data" not in cols:
return
col_type = str(cols["session_data"]["type"]).lower()
if "text" in col_type and "json" not in col_type:
return # already TEXT — nothing to do
# Change column type from JSON to TEXT to hold Fernet ciphertext
op.alter_column(
"user_store_accounts",
"session_data",
type_=sa.Text(),
existing_type=sa.JSON(),
existing_nullable=True,
postgresql_using="session_data::text",
)
rows = conn.execute(
text("SELECT id, session_data FROM user_store_accounts WHERE session_data IS NOT NULL")
).fetchall()
f = _get_fernet()
for row_id, session_data in rows:
raw = str(session_data)
if _is_fernet_token(raw):
continue
plaintext = raw if isinstance(session_data, str) else json.dumps(session_data)
encrypted = f.encrypt(plaintext.encode()).decode()
conn.execute(
text("UPDATE user_store_accounts SET session_data = :data WHERE id = :id"),
{"data": encrypted, "id": row_id},
)
def downgrade() -> None:
conn = op.get_bind()
rows = conn.execute(
text("SELECT id, session_data FROM user_store_accounts WHERE session_data IS NOT NULL")
).fetchall()
f = _get_fernet()
for row_id, session_data in rows:
raw = str(session_data)
if not _is_fernet_token(raw):
continue
decrypted = f.decrypt(raw.encode()).decode()
conn.execute(
text("UPDATE user_store_accounts SET session_data = :data WHERE id = :id"),
{"data": decrypted, "id": row_id},
)
# Revert column type from TEXT back to JSON
op.alter_column(
"user_store_accounts",
"session_data",
type_=sa.JSON(),
existing_type=sa.Text(),
existing_nullable=True,
postgresql_using="session_data::json",
)