forked from cartsnitch/cartsnitch
Merge commit '4cf6f91e954b770198578bcb8db5d98ac964bfed' as 'common'
This commit is contained in:
@@ -0,0 +1 @@
|
||||
"""Deterministic seed data generator for CartSnitch dev environment."""
|
||||
@@ -0,0 +1,50 @@
|
||||
"""Entry point for `python -m cartsnitch_common.seed` and `cartsnitch-seed` CLI."""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from cartsnitch_common.seed.config import SEED_VALUE
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="cartsnitch-seed",
|
||||
description="Generate deterministic seed data for the CartSnitch dev environment.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--database-url",
|
||||
default=None,
|
||||
help=(
|
||||
"PostgreSQL connection URL (sync driver). "
|
||||
"Defaults to CARTSNITCH_DATABASE_URL_SYNC env var or built-in default."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
help="Print planned record counts without writing to the database.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed",
|
||||
type=int,
|
||||
default=SEED_VALUE,
|
||||
help=f"Random seed for deterministic output (default: {SEED_VALUE}).",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
from cartsnitch_common.seed.runner import run_seed
|
||||
|
||||
run_seed(
|
||||
database_url=args.database_url,
|
||||
seed_value=args.seed,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
except Exception as exc:
|
||||
print(f"ERROR: {exc}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,38 @@
|
||||
"""Seed configuration constants."""
|
||||
|
||||
from datetime import date
|
||||
|
||||
# Random seed for deterministic output
|
||||
SEED_VALUE: int = 42
|
||||
|
||||
# Date window: 6 months of history ending today (relative to seed baseline)
|
||||
SEED_BASELINE_DATE: date = date(2026, 3, 21)
|
||||
SEED_START_DATE: date = date(2025, 9, 21)
|
||||
SEED_END_DATE: date = date(2026, 3, 21)
|
||||
|
||||
# Scale targets
|
||||
NUM_STORES: int = 3
|
||||
NUM_LOCATIONS_PER_STORE: int = 5 # 15 total
|
||||
NUM_USERS: int = 500
|
||||
NUM_ACTIVE_USERS: int = 50
|
||||
NUM_USER_STORE_ACCOUNTS: int = 100
|
||||
NUM_PRODUCTS: int = 500
|
||||
NUM_PURCHASES: int = 5_000
|
||||
NUM_PURCHASE_ITEMS: int = 25_000
|
||||
NUM_PRICE_HISTORY: int = 50_000
|
||||
NUM_COUPONS: int = 200
|
||||
NUM_SHRINKFLATION_EVENTS: int = 20
|
||||
|
||||
# Price-increase products (for StickerShock detection)
|
||||
# 10% of products should show a significant price increase (>10%) over the window
|
||||
NUM_PRICE_INCREASE_PRODUCTS: int = 50 # ~10% of 500
|
||||
|
||||
# Coupon mix
|
||||
COUPON_EXPIRED_PCT: float = 0.60
|
||||
COUPON_ACTIVE_PCT: float = 0.40
|
||||
|
||||
# Items per purchase (target avg to hit 25K total from 5K purchases)
|
||||
AVG_ITEMS_PER_PURCHASE: int = 5
|
||||
|
||||
# Price history: ~100 observations per product (500 products * 100 = 50K)
|
||||
PRICE_OBS_PER_PRODUCT: int = 100
|
||||
@@ -0,0 +1 @@
|
||||
"""Seed data generators."""
|
||||
@@ -0,0 +1,107 @@
|
||||
"""Generate Coupon seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from cartsnitch_common.constants import DiscountType
|
||||
from cartsnitch_common.seed.config import (
|
||||
COUPON_EXPIRED_PCT,
|
||||
NUM_COUPONS,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
)
|
||||
|
||||
|
||||
def _decimal(val: float) -> Decimal:
|
||||
return Decimal(str(round(val, 2)))
|
||||
|
||||
|
||||
_COUPON_TITLES: list[str] = [
|
||||
"Save {val} on {product}",
|
||||
"{val} off your next {product} purchase",
|
||||
"Get {val} off {product}",
|
||||
"Buy {product}, save {val}",
|
||||
"Weekend special: {val} off {product}",
|
||||
"Member exclusive: {val} off {product}",
|
||||
"Digital coupon: {val} off {product}",
|
||||
]
|
||||
|
||||
|
||||
def generate_coupons(
|
||||
fake: Faker,
|
||||
products: list[dict],
|
||||
stores: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return NUM_COUPONS coupon records with realistic mix of active/expired."""
|
||||
now = datetime.now(tz=UTC)
|
||||
today = SEED_END_DATE
|
||||
coupons = []
|
||||
|
||||
num_expired = int(NUM_COUPONS * COUPON_EXPIRED_PCT)
|
||||
num_active = NUM_COUPONS - num_expired
|
||||
|
||||
def make_coupon(is_active: bool) -> dict:
|
||||
store = random.choice(stores)
|
||||
product = random.choice(products) if random.random() > 0.1 else None
|
||||
product_name = product["canonical_name"].split(" ", 2)[-1] if product else "any item"
|
||||
|
||||
discount_type = random.choice(list(DiscountType))
|
||||
|
||||
if discount_type == DiscountType.PERCENT:
|
||||
discount_value = _decimal(random.choice([5, 10, 15, 20, 25, 30]))
|
||||
title = f"Save {int(discount_value)}% on {product_name}"
|
||||
elif discount_type == DiscountType.FIXED:
|
||||
discount_value = _decimal(random.choice([0.50, 1.00, 1.50, 2.00, 2.50, 3.00, 5.00]))
|
||||
title = f"Save ${discount_value} on {product_name}"
|
||||
elif discount_type == DiscountType.BOGO:
|
||||
discount_value = None
|
||||
title = f"BOGO: Buy one {product_name}, get one free"
|
||||
else: # BUY_X_GET_Y
|
||||
discount_value = None
|
||||
title = f"Buy 2 {product_name}, get 1 free"
|
||||
|
||||
if is_active:
|
||||
valid_from = today - timedelta(days=random.randint(1, 30))
|
||||
valid_to = today + timedelta(days=random.randint(1, 60))
|
||||
else:
|
||||
valid_to = today - timedelta(days=random.randint(1, 180))
|
||||
valid_from = valid_to - timedelta(days=random.randint(7, 30))
|
||||
|
||||
requires_clip = random.random() > 0.5
|
||||
coupon_code = fake.bothify(text="??##-??##").upper() if not requires_clip else None
|
||||
min_purchase = _decimal(random.choice([0, 0, 0, 5.00, 10.00, 15.00])) or None
|
||||
|
||||
scraped_at = datetime(
|
||||
SEED_START_DATE.year, SEED_START_DATE.month, SEED_START_DATE.day, tzinfo=UTC
|
||||
) + timedelta(days=random.randint(0, 180))
|
||||
|
||||
return {
|
||||
"id": uuid.uuid4(),
|
||||
"store_id": store["id"],
|
||||
"normalized_product_id": product["id"] if product else None,
|
||||
"title": title,
|
||||
"description": fake.sentence(nb_words=10),
|
||||
"discount_type": discount_type,
|
||||
"discount_value": discount_value,
|
||||
"min_purchase": min_purchase,
|
||||
"valid_from": valid_from,
|
||||
"valid_to": valid_to,
|
||||
"requires_clip": requires_clip,
|
||||
"coupon_code": coupon_code,
|
||||
"source_url": None,
|
||||
"scraped_at": scraped_at,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
|
||||
for _ in range(num_expired):
|
||||
coupons.append(make_coupon(is_active=False))
|
||||
for _ in range(num_active):
|
||||
coupons.append(make_coupon(is_active=True))
|
||||
|
||||
random.shuffle(coupons)
|
||||
return coupons
|
||||
@@ -0,0 +1,162 @@
|
||||
"""Generate PriceHistory seed data with realistic patterns for StickerShock detection."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.constants import PriceSource
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_PRICE_HISTORY,
|
||||
NUM_PRICE_INCREASE_PRODUCTS,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
)
|
||||
|
||||
_DATE_RANGE_DAYS = (SEED_END_DATE - SEED_START_DATE).days
|
||||
|
||||
# Holidays within the seed window for seasonal sales (approx)
|
||||
_SALE_PERIODS: list[tuple[date, date]] = [
|
||||
(date(2025, 11, 27), date(2025, 11, 30)), # Thanksgiving / Black Friday
|
||||
(date(2025, 12, 20), date(2025, 12, 26)), # Christmas
|
||||
(date(2026, 1, 1), date(2026, 1, 2)), # New Year
|
||||
(date(2026, 2, 14), date(2026, 2, 15)), # Valentine's Day
|
||||
]
|
||||
|
||||
|
||||
def _is_sale_period(d: date) -> bool:
|
||||
return any(start <= d <= end for start, end in _SALE_PERIODS)
|
||||
|
||||
|
||||
def _decimal(val: float) -> Decimal:
|
||||
return Decimal(str(round(val, 2)))
|
||||
|
||||
|
||||
def _base_price_for_product(product: dict) -> float:
|
||||
"""Assign a realistic base price based on category."""
|
||||
from cartsnitch_common.constants import ProductCategory
|
||||
|
||||
category_ranges: dict[ProductCategory, tuple[float, float]] = {
|
||||
ProductCategory.PRODUCE: (1.49, 6.99),
|
||||
ProductCategory.DAIRY: (2.99, 8.99),
|
||||
ProductCategory.MEAT: (4.99, 19.99),
|
||||
ProductCategory.BAKERY: (2.49, 7.99),
|
||||
ProductCategory.FROZEN: (3.99, 12.99),
|
||||
ProductCategory.PANTRY: (1.99, 9.99),
|
||||
ProductCategory.BEVERAGES: (0.99, 6.99),
|
||||
ProductCategory.SNACKS: (2.49, 6.99),
|
||||
ProductCategory.HOUSEHOLD: (3.99, 19.99),
|
||||
ProductCategory.PERSONAL_CARE: (3.99, 14.99),
|
||||
}
|
||||
cat: ProductCategory | None = product.get("category")
|
||||
lo, hi = category_ranges.get(cat, (1.99, 9.99)) if cat is not None else (1.99, 9.99)
|
||||
return random.uniform(lo, hi)
|
||||
|
||||
|
||||
def generate_price_history(
|
||||
products: list[dict],
|
||||
stores: list[dict],
|
||||
purchase_items: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return ~NUM_PRICE_HISTORY price history records with realistic patterns.
|
||||
|
||||
Pattern types (assigned per product):
|
||||
- sudden_jump: flat then >10% price increase at a random point
|
||||
- gradual_creep: slow steady increase over the window
|
||||
- stable: nearly flat price with small noise
|
||||
- sale_driven: drops during holiday periods, returns after
|
||||
- volatile: random walk
|
||||
|
||||
10% of products (NUM_PRICE_INCREASE_PRODUCTS) will show a detectable
|
||||
price increase (>10%) that StickerShock can flag.
|
||||
"""
|
||||
now = datetime.now(tz=UTC)
|
||||
records: list[dict] = []
|
||||
|
||||
# Build purchase-item lookup: (product_id, store_id) -> [purchase_item_id]
|
||||
item_lookup: dict[tuple, list[uuid.UUID]] = {}
|
||||
for item in purchase_items:
|
||||
key = (item["normalized_product_id"], item.get("_store_id"))
|
||||
item_lookup.setdefault(key, []).append(item["id"])
|
||||
|
||||
total = NUM_PRICE_HISTORY
|
||||
per_product_per_store = total // (len(products) * len(stores))
|
||||
per_product_per_store = max(per_product_per_store, 1)
|
||||
|
||||
# Assign patterns
|
||||
product_patterns: list[str] = []
|
||||
price_increase_indices = set(random.sample(range(len(products)), NUM_PRICE_INCREASE_PRODUCTS))
|
||||
pattern_pool = ["sale_driven", "stable", "gradual_creep", "volatile"]
|
||||
for i in range(len(products)):
|
||||
if i in price_increase_indices:
|
||||
product_patterns.append(random.choice(["sudden_jump", "gradual_creep"]))
|
||||
else:
|
||||
product_patterns.append(random.choice(pattern_pool))
|
||||
|
||||
for i, product in enumerate(products):
|
||||
pattern = product_patterns[i]
|
||||
base_price = _base_price_for_product(product)
|
||||
|
||||
# Jump point for sudden_jump (50-80% through window)
|
||||
jump_day = int(_DATE_RANGE_DAYS * random.uniform(0.5, 0.8))
|
||||
jump_factor = random.uniform(1.10, 1.25) # 10-25% increase
|
||||
|
||||
for store in stores:
|
||||
# Generate obs dates spread across the window
|
||||
obs_days = sorted(
|
||||
random.sample(
|
||||
range(_DATE_RANGE_DAYS + 1),
|
||||
min(per_product_per_store, _DATE_RANGE_DAYS + 1),
|
||||
)
|
||||
)
|
||||
|
||||
for day_offset in obs_days:
|
||||
obs_date = SEED_START_DATE + timedelta(days=day_offset)
|
||||
progress = day_offset / max(_DATE_RANGE_DAYS, 1)
|
||||
|
||||
# Compute regular price by pattern
|
||||
if pattern == "sudden_jump":
|
||||
if day_offset < jump_day:
|
||||
price = base_price + random.uniform(-0.05, 0.05)
|
||||
else:
|
||||
price = base_price * jump_factor + random.uniform(-0.05, 0.05)
|
||||
elif pattern == "gradual_creep":
|
||||
price = base_price * (1 + 0.12 * progress) + random.uniform(-0.10, 0.10)
|
||||
elif pattern == "stable":
|
||||
price = base_price + random.uniform(-0.10, 0.10)
|
||||
elif pattern == "volatile":
|
||||
price = base_price * random.uniform(0.85, 1.15)
|
||||
else:
|
||||
price = base_price + random.uniform(-0.05, 0.05)
|
||||
|
||||
price = max(0.99, price)
|
||||
regular_price = _decimal(price)
|
||||
|
||||
# Sale price during holiday periods
|
||||
sale_price: Decimal | None = None
|
||||
if _is_sale_period(obs_date):
|
||||
sale_price = _decimal(price * random.uniform(0.75, 0.90))
|
||||
|
||||
records.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"normalized_product_id": product["id"],
|
||||
"store_id": store["id"],
|
||||
"observed_date": obs_date,
|
||||
"regular_price": regular_price,
|
||||
"sale_price": sale_price,
|
||||
"loyalty_price": None,
|
||||
"coupon_price": None,
|
||||
"source": (
|
||||
PriceSource.RECEIPT if random.random() > 0.3 else PriceSource.CATALOG
|
||||
),
|
||||
"purchase_item_id": None,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
|
||||
if len(records) >= NUM_PRICE_HISTORY:
|
||||
return records
|
||||
|
||||
return records
|
||||
@@ -0,0 +1,253 @@
|
||||
"""Generate NormalizedProduct seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from cartsnitch_common.constants import ProductCategory, SizeUnit
|
||||
from cartsnitch_common.seed.config import NUM_PRODUCTS
|
||||
|
||||
# Product templates per category: (category, brands, names, sizes, default_unit)
|
||||
_PRODUCT_TEMPLATES: list[tuple[ProductCategory, list[str], list[str], list[str], SizeUnit]] = [
|
||||
(
|
||||
ProductCategory.PRODUCE,
|
||||
["Organic Valley", "Earthbound Farm", "Local Farm", "Fresh Farms"],
|
||||
[
|
||||
"Bananas",
|
||||
"Apples",
|
||||
"Baby Carrots",
|
||||
"Spinach",
|
||||
"Broccoli",
|
||||
"Strawberries",
|
||||
"Blueberries",
|
||||
"Grapes",
|
||||
"Tomatoes",
|
||||
"Lettuce",
|
||||
],
|
||||
["1 lb", "2 lb", "16 oz", "12 oz", "5 oz", "6 oz", "32 oz"],
|
||||
SizeUnit.LB,
|
||||
),
|
||||
(
|
||||
ProductCategory.DAIRY,
|
||||
["Kraft", "Tillamook", "Great Value", "Land O'Lakes", "Daisy", "Organic Valley"],
|
||||
[
|
||||
"Whole Milk",
|
||||
"2% Milk",
|
||||
"Cheddar Cheese",
|
||||
"Mozzarella",
|
||||
"Greek Yogurt",
|
||||
"Butter",
|
||||
"Cream Cheese",
|
||||
"Sour Cream",
|
||||
"Heavy Cream",
|
||||
"Cottage Cheese",
|
||||
],
|
||||
["16 oz", "32 oz", "64 oz", "1 gallon", "8 oz", "12 oz", "5 oz"],
|
||||
SizeUnit.FL_OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.MEAT,
|
||||
["Tyson", "Perdue", "Smithfield", "Oscar Mayer", "Applegate", "Kirkland"],
|
||||
[
|
||||
"Chicken Breast",
|
||||
"Ground Beef",
|
||||
"Pork Chops",
|
||||
"Bacon",
|
||||
"Turkey",
|
||||
"Salmon",
|
||||
"Tilapia",
|
||||
"Sausage",
|
||||
"Hot Dogs",
|
||||
"Deli Ham",
|
||||
],
|
||||
["1 lb", "2 lb", "3 lb", "12 oz", "16 oz", "24 oz"],
|
||||
SizeUnit.LB,
|
||||
),
|
||||
(
|
||||
ProductCategory.BAKERY,
|
||||
["Nature's Own", "Dave's Killer Bread", "Pepperidge Farm", "Sara Lee", "Arnold"],
|
||||
[
|
||||
"White Bread",
|
||||
"Whole Wheat Bread",
|
||||
"Sourdough",
|
||||
"Bagels",
|
||||
"English Muffins",
|
||||
"Croissants",
|
||||
"Dinner Rolls",
|
||||
"Hamburger Buns",
|
||||
"Hot Dog Buns",
|
||||
"Muffins",
|
||||
],
|
||||
["20 oz", "24 oz", "6 ct", "8 ct", "12 ct", "16 oz"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.FROZEN,
|
||||
["Stouffer's", "Amy's", "Birds Eye", "Green Giant", "Totino's", "DiGiorno"],
|
||||
[
|
||||
"Frozen Pizza",
|
||||
"Mac and Cheese",
|
||||
"Frozen Burritos",
|
||||
"Chicken Nuggets",
|
||||
"Fish Sticks",
|
||||
"Frozen Vegetables",
|
||||
"Ice Cream",
|
||||
"Frozen Waffles",
|
||||
"Tater Tots",
|
||||
"Frozen Lasagna",
|
||||
],
|
||||
["12 oz", "16 oz", "24 oz", "32 oz", "4 ct", "8 ct"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.PANTRY,
|
||||
["Campbell's", "Hunt's", "Kraft", "Heinz", "Del Monte", "General Mills", "Kellogg's"],
|
||||
[
|
||||
"Pasta Sauce",
|
||||
"Canned Tomatoes",
|
||||
"Chicken Noodle Soup",
|
||||
"Peanut Butter",
|
||||
"Jelly",
|
||||
"Olive Oil",
|
||||
"Rice",
|
||||
"Pasta",
|
||||
"Oatmeal",
|
||||
"Cereal",
|
||||
],
|
||||
["15 oz", "24 oz", "32 oz", "18 oz", "16 oz", "24 oz", "48 oz", "64 oz"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.BEVERAGES,
|
||||
["Coca-Cola", "Pepsi", "Tropicana", "Minute Maid", "Gatorade", "LaCroix", "Nestle"],
|
||||
[
|
||||
"Cola",
|
||||
"Diet Cola",
|
||||
"Orange Juice",
|
||||
"Apple Juice",
|
||||
"Sports Drink",
|
||||
"Sparkling Water",
|
||||
"Iced Coffee",
|
||||
"Energy Drink",
|
||||
"Lemonade",
|
||||
"Green Tea",
|
||||
],
|
||||
["12 fl oz", "20 fl oz", "32 fl oz", "64 fl oz", "2 liter", "6 pk", "12 pk"],
|
||||
SizeUnit.FL_OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.SNACKS,
|
||||
["Frito-Lay", "Nabisco", "Kellogg's", "Pepperidge Farm", "Clif Bar", "KIND", "Planters"],
|
||||
[
|
||||
"Potato Chips",
|
||||
"Tortilla Chips",
|
||||
"Pretzels",
|
||||
"Crackers",
|
||||
"Granola Bars",
|
||||
"Trail Mix",
|
||||
"Popcorn",
|
||||
"Cookies",
|
||||
"Nuts",
|
||||
"Fruit Snacks",
|
||||
],
|
||||
["7 oz", "10 oz", "16 oz", "6 ct", "12 ct", "18 ct", "3.5 oz"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.HOUSEHOLD,
|
||||
["Tide", "Dawn", "Bounty", "Charmin", "Clorox", "Method", "Seventh Generation"],
|
||||
[
|
||||
"Laundry Detergent",
|
||||
"Dish Soap",
|
||||
"Paper Towels",
|
||||
"Toilet Paper",
|
||||
"Bleach",
|
||||
"All-Purpose Cleaner",
|
||||
"Fabric Softener",
|
||||
"Dryer Sheets",
|
||||
"Trash Bags",
|
||||
"Sponges",
|
||||
],
|
||||
["32 oz", "64 oz", "100 oz", "6 pk", "12 pk", "24 ct", "2 pk"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
(
|
||||
ProductCategory.PERSONAL_CARE,
|
||||
["Dove", "Pantene", "Colgate", "Crest", "Gillette", "L'Oreal", "Neutrogena"],
|
||||
[
|
||||
"Shampoo",
|
||||
"Conditioner",
|
||||
"Body Wash",
|
||||
"Toothpaste",
|
||||
"Deodorant",
|
||||
"Face Wash",
|
||||
"Lotion",
|
||||
"Razor",
|
||||
"Shaving Cream",
|
||||
"Hand Soap",
|
||||
],
|
||||
["12 oz", "24 oz", "32 oz", "3.4 oz", "6 oz", "8 oz", "2 pk"],
|
||||
SizeUnit.OZ,
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _generate_upc() -> str:
|
||||
"""Generate a fake 12-digit UPC."""
|
||||
digits = [random.randint(0, 9) for _ in range(11)]
|
||||
odd_sum = sum(digits[i] for i in range(0, 11, 2))
|
||||
even_sum = sum(digits[i] for i in range(1, 11, 2))
|
||||
check = (10 - ((odd_sum * 3 + even_sum) % 10)) % 10
|
||||
digits.append(check)
|
||||
return "".join(str(d) for d in digits)
|
||||
|
||||
|
||||
def generate_products(fake: Faker) -> list[dict]:
|
||||
"""Return NUM_PRODUCTS normalized product records."""
|
||||
now = datetime.now(tz=UTC)
|
||||
products = []
|
||||
used_upcs: set[str] = set()
|
||||
|
||||
per_category = NUM_PRODUCTS // len(_PRODUCT_TEMPLATES)
|
||||
remainder = NUM_PRODUCTS % len(_PRODUCT_TEMPLATES)
|
||||
|
||||
for i, (category, brands, names, sizes, default_unit) in enumerate(_PRODUCT_TEMPLATES):
|
||||
count = per_category + (1 if i < remainder else 0)
|
||||
for _ in range(count):
|
||||
brand = random.choice(brands)
|
||||
product_name = random.choice(names)
|
||||
size_str = random.choice(sizes)
|
||||
canonical_name = f"{brand} {product_name} {size_str}"
|
||||
|
||||
size_parts = size_str.split(" ", 1)
|
||||
size_val = size_parts[0]
|
||||
|
||||
num_upcs = random.randint(1, 3)
|
||||
upcs: list[str] = []
|
||||
for _ in range(num_upcs):
|
||||
upc = _generate_upc()
|
||||
attempts = 0
|
||||
while upc in used_upcs and attempts < 10:
|
||||
upc = _generate_upc()
|
||||
attempts += 1
|
||||
used_upcs.add(upc)
|
||||
upcs.append(upc)
|
||||
|
||||
products.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"canonical_name": canonical_name,
|
||||
"category": category,
|
||||
"subcategory": product_name,
|
||||
"brand": brand,
|
||||
"size": size_val,
|
||||
"size_unit": default_unit,
|
||||
"upc_variants": upcs,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
|
||||
return products
|
||||
@@ -0,0 +1,156 @@
|
||||
"""Generate Purchase and PurchaseItem seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_PURCHASE_ITEMS,
|
||||
NUM_PURCHASES,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
)
|
||||
|
||||
_DATE_RANGE_DAYS = (SEED_END_DATE - SEED_START_DATE).days
|
||||
|
||||
|
||||
def _random_date() -> date:
|
||||
return SEED_START_DATE + timedelta(days=random.randint(0, _DATE_RANGE_DAYS))
|
||||
|
||||
|
||||
def _decimal(val: float, places: int = 2) -> Decimal:
|
||||
return Decimal(str(round(val, places)))
|
||||
|
||||
|
||||
def generate_purchases(
|
||||
users: list[dict],
|
||||
stores: list[dict],
|
||||
store_locations: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return NUM_PURCHASES purchase records."""
|
||||
now = datetime.now(tz=UTC)
|
||||
active_users = [u for u in users if u["_active"]]
|
||||
inactive_users = [u for u in users if not u["_active"]]
|
||||
|
||||
# Build location index by store_id
|
||||
locs_by_store: dict = {}
|
||||
for loc in store_locations:
|
||||
locs_by_store.setdefault(loc["store_id"], []).append(loc)
|
||||
|
||||
purchases = []
|
||||
seen_receipts: set[tuple] = set()
|
||||
|
||||
# Active users get 80% of purchases
|
||||
active_count = int(NUM_PURCHASES * 0.8)
|
||||
inactive_count = NUM_PURCHASES - active_count
|
||||
|
||||
def make_purchase(user: dict, store: dict) -> dict | None:
|
||||
receipt_id = f"RCT-{random.randint(100000, 999999)}"
|
||||
key = (user["id"], store["id"], receipt_id)
|
||||
if key in seen_receipts:
|
||||
return None
|
||||
seen_receipts.add(key)
|
||||
subtotal = _decimal(random.uniform(5.0, 150.0))
|
||||
tax = _decimal(float(subtotal) * 0.06)
|
||||
savings = _decimal(random.uniform(0.0, float(subtotal) * 0.3))
|
||||
total = _decimal(float(subtotal) + float(tax) - float(savings))
|
||||
purchase_date = _random_date()
|
||||
store_locs = locs_by_store.get(store["id"], [])
|
||||
store_location_id = random.choice(store_locs)["id"] if store_locs else None
|
||||
ingested_at = datetime(
|
||||
purchase_date.year, purchase_date.month, purchase_date.day, tzinfo=UTC
|
||||
) + timedelta(hours=random.randint(1, 48))
|
||||
return {
|
||||
"id": uuid.uuid4(),
|
||||
"user_id": user["id"],
|
||||
"store_id": store["id"],
|
||||
"store_location_id": store_location_id,
|
||||
"receipt_id": receipt_id,
|
||||
"purchase_date": purchase_date,
|
||||
"total": total,
|
||||
"subtotal": subtotal,
|
||||
"tax": tax,
|
||||
"savings_total": savings if float(savings) > 0 else None,
|
||||
"source_url": None,
|
||||
"raw_data": None,
|
||||
"ingested_at": ingested_at,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
|
||||
for _ in range(active_count):
|
||||
user = random.choice(active_users)
|
||||
store = random.choice(stores)
|
||||
p = make_purchase(user, store)
|
||||
if p:
|
||||
purchases.append(p)
|
||||
|
||||
for _ in range(inactive_count):
|
||||
user = random.choice(inactive_users)
|
||||
store = random.choice(stores)
|
||||
p = make_purchase(user, store)
|
||||
if p:
|
||||
purchases.append(p)
|
||||
|
||||
return purchases[:NUM_PURCHASES]
|
||||
|
||||
|
||||
def generate_purchase_items(
|
||||
purchases: list[dict],
|
||||
products: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return ~NUM_PURCHASE_ITEMS purchase item records distributed across purchases."""
|
||||
now = datetime.now(tz=UTC)
|
||||
items: list[dict] = []
|
||||
total_target = NUM_PURCHASE_ITEMS
|
||||
num_purchases = len(purchases)
|
||||
|
||||
# Distribute items: avg 5 per purchase with variance
|
||||
for i, purchase in enumerate(purchases):
|
||||
# Remaining purchases get proportional share
|
||||
remaining_purchases = num_purchases - i
|
||||
remaining_items = total_target - len(items)
|
||||
if remaining_purchases <= 0 or remaining_items <= 0:
|
||||
break
|
||||
avg = remaining_items / remaining_purchases
|
||||
count = max(1, min(15, int(random.gauss(avg, 2))))
|
||||
count = min(count, remaining_items)
|
||||
|
||||
for _ in range(count):
|
||||
product = random.choice(products)
|
||||
unit_price = _decimal(random.uniform(0.99, 25.99))
|
||||
quantity = Decimal("1.000")
|
||||
extended_price = _decimal(float(unit_price) * float(quantity))
|
||||
has_sale = random.random() > 0.7
|
||||
sale_price = (
|
||||
_decimal(float(unit_price) * random.uniform(0.7, 0.95)) if has_sale else None
|
||||
)
|
||||
has_coupon = random.random() > 0.85
|
||||
coupon_discount = _decimal(random.uniform(0.25, 2.00)) if has_coupon else None
|
||||
|
||||
upc = None
|
||||
if product["upc_variants"]:
|
||||
upc = random.choice(product["upc_variants"])
|
||||
|
||||
items.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"purchase_id": purchase["id"],
|
||||
"product_name_raw": product["canonical_name"],
|
||||
"upc": upc,
|
||||
"quantity": quantity,
|
||||
"unit_price": unit_price,
|
||||
"extended_price": extended_price,
|
||||
"regular_price": unit_price,
|
||||
"sale_price": sale_price,
|
||||
"coupon_discount": coupon_discount,
|
||||
"loyalty_discount": None,
|
||||
"category_raw": product["category"].value if product["category"] else None,
|
||||
"normalized_product_id": product["id"],
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
|
||||
return items
|
||||
@@ -0,0 +1,114 @@
|
||||
"""Generate ShrinkflationEvent seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.constants import SizeUnit
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_SHRINKFLATION_EVENTS,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
)
|
||||
|
||||
_DATE_RANGE_DAYS = (SEED_END_DATE - SEED_START_DATE).days
|
||||
|
||||
# Shrinkflation patterns: (old_size, new_size, unit, size_reduction_pct)
|
||||
_SHRINK_PATTERNS: list[tuple[str, str, SizeUnit, float]] = [
|
||||
("16", "14", SizeUnit.OZ, 0.125),
|
||||
("32", "28", SizeUnit.OZ, 0.125),
|
||||
("64", "56", SizeUnit.FL_OZ, 0.125),
|
||||
("18", "16", SizeUnit.OZ, 0.111),
|
||||
("20", "18", SizeUnit.OZ, 0.10),
|
||||
("2", "1.75", SizeUnit.LB, 0.125),
|
||||
("24", "21", SizeUnit.OZ, 0.125),
|
||||
("12", "10.5", SizeUnit.OZ, 0.125),
|
||||
("48", "42", SizeUnit.OZ, 0.125),
|
||||
("8", "7", SizeUnit.OZ, 0.125),
|
||||
("1", "0.875", SizeUnit.LB, 0.125),
|
||||
("36", "32", SizeUnit.OZ, 0.111),
|
||||
("6", "5", SizeUnit.CT, 0.167),
|
||||
("12", "10", SizeUnit.CT, 0.167),
|
||||
("100", "90", SizeUnit.CT, 0.10),
|
||||
("16.9", "15", SizeUnit.FL_OZ, 0.112),
|
||||
("3", "2.5", SizeUnit.LB, 0.167),
|
||||
("40", "35", SizeUnit.OZ, 0.125),
|
||||
("28", "24", SizeUnit.OZ, 0.143),
|
||||
("14.5", "12.5", SizeUnit.OZ, 0.138),
|
||||
]
|
||||
|
||||
|
||||
def _decimal(val: float) -> Decimal:
|
||||
return Decimal(str(round(val, 2)))
|
||||
|
||||
|
||||
def generate_shrinkflation_events(products: list[dict]) -> list[dict]:
|
||||
"""Return NUM_SHRINKFLATION_EVENTS shrinkflation event records.
|
||||
|
||||
Selects products and assigns size changes where price is maintained or
|
||||
increased despite the smaller package — valid inputs for ShrinkRay.
|
||||
"""
|
||||
now = datetime.now(tz=UTC)
|
||||
events = []
|
||||
|
||||
# Pick NUM_SHRINKFLATION_EVENTS unique products (prefer pantry/snacks/household)
|
||||
from cartsnitch_common.constants import ProductCategory
|
||||
|
||||
preferred_cats = {
|
||||
ProductCategory.PANTRY,
|
||||
ProductCategory.SNACKS,
|
||||
ProductCategory.HOUSEHOLD,
|
||||
ProductCategory.PERSONAL_CARE,
|
||||
ProductCategory.FROZEN,
|
||||
ProductCategory.DAIRY,
|
||||
ProductCategory.BEVERAGES,
|
||||
}
|
||||
preferred = [p for p in products if p.get("category") in preferred_cats]
|
||||
fallback = [p for p in products if p not in preferred]
|
||||
pool = preferred + fallback
|
||||
|
||||
selected = random.sample(pool, min(NUM_SHRINKFLATION_EVENTS, len(pool)))
|
||||
|
||||
for i, product in enumerate(selected):
|
||||
pattern = _SHRINK_PATTERNS[i % len(_SHRINK_PATTERNS)]
|
||||
old_size, new_size, unit, reduction_pct = pattern
|
||||
|
||||
# Detection date: at least 60 days into window so there's history before
|
||||
min_day = 60
|
||||
detected_day = random.randint(min_day, _DATE_RANGE_DAYS)
|
||||
detected_date = SEED_START_DATE + timedelta(days=detected_day)
|
||||
|
||||
# Price maintained or slightly increased despite size reduction
|
||||
base_price = random.uniform(2.99, 12.99)
|
||||
price_at_old_size = _decimal(base_price)
|
||||
# flat or small increase despite size reduction
|
||||
price_at_new_size = _decimal(base_price * random.uniform(0.98, 1.08))
|
||||
|
||||
confidence = _decimal(random.uniform(0.70, 0.99))
|
||||
|
||||
notes = (
|
||||
f"Package reduced from {old_size}{unit} to {new_size}{unit} "
|
||||
f"({reduction_pct * 100:.1f}% reduction). "
|
||||
f"Price {'increased' if price_at_new_size > price_at_old_size else 'held steady'}."
|
||||
)
|
||||
|
||||
events.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"normalized_product_id": product["id"],
|
||||
"detected_date": detected_date,
|
||||
"old_size": old_size,
|
||||
"new_size": new_size,
|
||||
"old_unit": unit,
|
||||
"new_unit": unit,
|
||||
"price_at_old_size": price_at_old_size,
|
||||
"price_at_new_size": price_at_new_size,
|
||||
"confidence": confidence,
|
||||
"notes": notes,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
|
||||
return events
|
||||
@@ -0,0 +1,203 @@
|
||||
"""Generate Store and StoreLocation seed data."""
|
||||
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from cartsnitch_common.constants import StoreSlug
|
||||
from cartsnitch_common.seed.config import NUM_LOCATIONS_PER_STORE
|
||||
|
||||
# Fixed store definitions
|
||||
_STORE_DEFS: list[dict] = [
|
||||
{
|
||||
"name": "Meijer",
|
||||
"slug": StoreSlug.MEIJER,
|
||||
"logo_url": "https://www.meijer.com/favicon.ico",
|
||||
"website_url": "https://www.meijer.com",
|
||||
},
|
||||
{
|
||||
"name": "Kroger",
|
||||
"slug": StoreSlug.KROGER,
|
||||
"logo_url": "https://www.kroger.com/favicon.ico",
|
||||
"website_url": "https://www.kroger.com",
|
||||
},
|
||||
{
|
||||
"name": "Target",
|
||||
"slug": StoreSlug.TARGET,
|
||||
"logo_url": "https://www.target.com/favicon.ico",
|
||||
"website_url": "https://www.target.com",
|
||||
},
|
||||
]
|
||||
|
||||
# SE Michigan locations per store (5 each = 15 total)
|
||||
_LOCATION_DEFS: dict[StoreSlug, list[dict]] = {
|
||||
StoreSlug.MEIJER: [
|
||||
{
|
||||
"address": "3145 Ann Arbor-Saline Rd",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48103",
|
||||
"lat": 42.2434,
|
||||
"lng": -83.8102,
|
||||
},
|
||||
{
|
||||
"address": "700 W Ellsworth Rd",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48108",
|
||||
"lat": 42.2318,
|
||||
"lng": -83.7581,
|
||||
},
|
||||
{
|
||||
"address": "5100 Oakman Blvd",
|
||||
"city": "Dearborn",
|
||||
"state": "MI",
|
||||
"zip": "48126",
|
||||
"lat": 42.3223,
|
||||
"lng": -83.1952,
|
||||
},
|
||||
{
|
||||
"address": "15555 Northline Rd",
|
||||
"city": "Southgate",
|
||||
"state": "MI",
|
||||
"zip": "48195",
|
||||
"lat": 42.2089,
|
||||
"lng": -83.1953,
|
||||
},
|
||||
{
|
||||
"address": "2855 Washtenaw Ave",
|
||||
"city": "Ypsilanti",
|
||||
"state": "MI",
|
||||
"zip": "48197",
|
||||
"lat": 42.2461,
|
||||
"lng": -83.6388,
|
||||
},
|
||||
],
|
||||
StoreSlug.KROGER: [
|
||||
{
|
||||
"address": "2010 W Stadium Blvd",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48103",
|
||||
"lat": 42.2706,
|
||||
"lng": -83.7807,
|
||||
},
|
||||
{
|
||||
"address": "1100 S Main St",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48104",
|
||||
"lat": 42.2555,
|
||||
"lng": -83.7469,
|
||||
},
|
||||
{
|
||||
"address": "23650 Michigan Ave",
|
||||
"city": "Dearborn",
|
||||
"state": "MI",
|
||||
"zip": "48124",
|
||||
"lat": 42.3221,
|
||||
"lng": -83.2135,
|
||||
},
|
||||
{
|
||||
"address": "14000 Michigan Ave",
|
||||
"city": "Dearborn",
|
||||
"state": "MI",
|
||||
"zip": "48126",
|
||||
"lat": 42.3281,
|
||||
"lng": -83.1789,
|
||||
},
|
||||
{
|
||||
"address": "3965 Packard St",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48108",
|
||||
"lat": 42.2298,
|
||||
"lng": -83.7196,
|
||||
},
|
||||
],
|
||||
StoreSlug.TARGET: [
|
||||
{
|
||||
"address": "3165 Ann Arbor-Saline Rd",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48103",
|
||||
"lat": 42.2431,
|
||||
"lng": -83.8097,
|
||||
},
|
||||
{
|
||||
"address": "4001 Carpenter Rd",
|
||||
"city": "Ypsilanti",
|
||||
"state": "MI",
|
||||
"zip": "48197",
|
||||
"lat": 42.2373,
|
||||
"lng": -83.6617,
|
||||
},
|
||||
{
|
||||
"address": "16000 Ford Rd",
|
||||
"city": "Dearborn",
|
||||
"state": "MI",
|
||||
"zip": "48126",
|
||||
"lat": 42.3312,
|
||||
"lng": -83.2098,
|
||||
},
|
||||
{
|
||||
"address": "17300 Eureka Rd",
|
||||
"city": "Southgate",
|
||||
"state": "MI",
|
||||
"zip": "48195",
|
||||
"lat": 42.2001,
|
||||
"lng": -83.2014,
|
||||
},
|
||||
{
|
||||
"address": "2400 E Stadium Blvd",
|
||||
"city": "Ann Arbor",
|
||||
"state": "MI",
|
||||
"zip": "48104",
|
||||
"lat": 42.2624,
|
||||
"lng": -83.7102,
|
||||
},
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def generate_stores() -> list[dict]:
|
||||
"""Return 3 fixed store records."""
|
||||
now = datetime.now(tz=UTC)
|
||||
stores = []
|
||||
for defn in _STORE_DEFS:
|
||||
stores.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"name": defn["name"],
|
||||
"slug": defn["slug"],
|
||||
"logo_url": defn["logo_url"],
|
||||
"website_url": defn["website_url"],
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
return stores
|
||||
|
||||
|
||||
def generate_store_locations(stores: list[dict]) -> list[dict]:
|
||||
"""Return 5 locations per store (15 total)."""
|
||||
now = datetime.now(tz=UTC)
|
||||
slug_to_id = {s["slug"]: s["id"] for s in stores}
|
||||
locations = []
|
||||
for slug, loc_defs in _LOCATION_DEFS.items():
|
||||
store_id = slug_to_id[slug]
|
||||
for loc in loc_defs[:NUM_LOCATIONS_PER_STORE]:
|
||||
locations.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"store_id": store_id,
|
||||
"address": loc["address"],
|
||||
"city": loc["city"],
|
||||
"state": loc["state"],
|
||||
"zip": loc["zip"],
|
||||
"lat": loc["lat"],
|
||||
"lng": loc["lng"],
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
return locations
|
||||
@@ -0,0 +1,105 @@
|
||||
"""Generate User and UserStoreAccount seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, datetime, timedelta
|
||||
|
||||
from faker import Faker
|
||||
|
||||
from cartsnitch_common.constants import AccountStatus
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_ACTIVE_USERS,
|
||||
NUM_USER_STORE_ACCOUNTS,
|
||||
NUM_USERS,
|
||||
SEED_END_DATE,
|
||||
)
|
||||
|
||||
|
||||
def generate_users(fake: Faker) -> list[dict]:
|
||||
"""Return NUM_USERS user records. First NUM_ACTIVE_USERS are active."""
|
||||
now = datetime.now(tz=UTC)
|
||||
users = []
|
||||
for i in range(NUM_USERS):
|
||||
created_at = now - timedelta(days=random.randint(30, 365))
|
||||
users.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"email": fake.unique.email(),
|
||||
"hashed_password": fake.sha256(),
|
||||
"display_name": fake.name() if random.random() > 0.2 else None,
|
||||
"created_at": created_at,
|
||||
"updated_at": created_at,
|
||||
"_active": i < NUM_ACTIVE_USERS,
|
||||
}
|
||||
)
|
||||
return users
|
||||
|
||||
|
||||
def generate_user_store_accounts(
|
||||
users: list[dict],
|
||||
stores: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return ~NUM_USER_STORE_ACCOUNTS user-store account links.
|
||||
|
||||
Active users get accounts at multiple stores; inactive users may have none.
|
||||
"""
|
||||
now = datetime.now(tz=UTC)
|
||||
accounts = []
|
||||
seen: set[tuple] = set()
|
||||
|
||||
active_users = [u for u in users if u["_active"]]
|
||||
inactive_users = [u for u in users if not u["_active"]]
|
||||
|
||||
# Active users: each gets 1-3 store accounts
|
||||
for user in active_users:
|
||||
num_accounts = random.randint(1, 3)
|
||||
selected_stores = random.sample(stores, min(num_accounts, len(stores)))
|
||||
for store in selected_stores:
|
||||
key = (user["id"], store["id"])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
last_sync = datetime(
|
||||
SEED_END_DATE.year,
|
||||
SEED_END_DATE.month,
|
||||
SEED_END_DATE.day,
|
||||
tzinfo=UTC,
|
||||
) - timedelta(days=random.randint(0, 14))
|
||||
accounts.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"user_id": user["id"],
|
||||
"store_id": store["id"],
|
||||
"session_data": {"token": "SEED_FAKE_TOKEN", "expires": "2026-12-31"},
|
||||
"session_expires_at": now + timedelta(days=random.randint(1, 90)),
|
||||
"last_sync_at": last_sync,
|
||||
"status": AccountStatus.ACTIVE,
|
||||
"created_at": user["created_at"],
|
||||
"updated_at": user["updated_at"],
|
||||
}
|
||||
)
|
||||
|
||||
# Fill remaining slots from inactive users
|
||||
remaining = NUM_USER_STORE_ACCOUNTS - len(accounts)
|
||||
for user in random.sample(inactive_users, min(remaining, len(inactive_users))):
|
||||
store = random.choice(stores)
|
||||
key = (user["id"], store["id"])
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
status = random.choice([AccountStatus.EXPIRED, AccountStatus.ERROR, AccountStatus.ACTIVE])
|
||||
accounts.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"user_id": user["id"],
|
||||
"store_id": store["id"],
|
||||
"session_data": None,
|
||||
"session_expires_at": None,
|
||||
"last_sync_at": None,
|
||||
"status": status,
|
||||
"created_at": user["created_at"],
|
||||
"updated_at": user["updated_at"],
|
||||
}
|
||||
)
|
||||
|
||||
return accounts[: NUM_USER_STORE_ACCOUNTS + len(active_users) * 3]
|
||||
@@ -0,0 +1,189 @@
|
||||
"""Seed runner: orchestrates generation and DB insertion in FK-safe order."""
|
||||
|
||||
import random
|
||||
import time
|
||||
from typing import Any
|
||||
|
||||
from faker import Faker
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from cartsnitch_common.database import get_sync_session_factory
|
||||
from cartsnitch_common.models.coupon import Coupon
|
||||
from cartsnitch_common.models.price import PriceHistory
|
||||
from cartsnitch_common.models.product import NormalizedProduct
|
||||
from cartsnitch_common.models.purchase import Purchase, PurchaseItem
|
||||
from cartsnitch_common.models.shrinkflation import ShrinkflationEvent
|
||||
from cartsnitch_common.models.store import Store, StoreLocation
|
||||
from cartsnitch_common.models.user import User, UserStoreAccount
|
||||
from cartsnitch_common.seed.config import SEED_VALUE
|
||||
from cartsnitch_common.seed.generators.coupons import generate_coupons
|
||||
from cartsnitch_common.seed.generators.prices import generate_price_history
|
||||
from cartsnitch_common.seed.generators.products import generate_products
|
||||
from cartsnitch_common.seed.generators.purchases import generate_purchase_items, generate_purchases
|
||||
from cartsnitch_common.seed.generators.shrinkflation import generate_shrinkflation_events
|
||||
from cartsnitch_common.seed.generators.stores import generate_store_locations, generate_stores
|
||||
from cartsnitch_common.seed.generators.users import generate_user_store_accounts, generate_users
|
||||
|
||||
# FK-safe truncation order (reverse of insertion order)
|
||||
_TRUNCATE_TABLES: list[str] = [
|
||||
"shrinkflation_events",
|
||||
"coupons",
|
||||
"price_history",
|
||||
"purchase_items",
|
||||
"purchases",
|
||||
"user_store_accounts",
|
||||
"normalized_products",
|
||||
"users",
|
||||
"store_locations",
|
||||
"stores",
|
||||
]
|
||||
|
||||
|
||||
def _log(msg: str) -> None:
|
||||
print(msg, flush=True)
|
||||
|
||||
|
||||
def _bulk_insert(session: Session, model: type, rows: list[dict[str, Any]]) -> None:
|
||||
"""Insert rows using core INSERT for performance, stripping private keys."""
|
||||
if not rows:
|
||||
return
|
||||
# Strip internal keys (prefixed with _)
|
||||
clean = [{k: v for k, v in row.items() if not k.startswith("_")} for row in rows]
|
||||
session.execute(model.__table__.insert(), clean) # type: ignore[attr-defined]
|
||||
|
||||
|
||||
def run_seed(
|
||||
database_url: str | None = None,
|
||||
seed_value: int = SEED_VALUE,
|
||||
dry_run: bool = False,
|
||||
) -> None:
|
||||
"""Generate and insert all seed data.
|
||||
|
||||
Args:
|
||||
database_url: Optional override for the DB connection URL.
|
||||
seed_value: Random seed for deterministic output.
|
||||
dry_run: If True, print planned counts without touching the DB.
|
||||
"""
|
||||
random.seed(seed_value)
|
||||
fake = Faker()
|
||||
Faker.seed(seed_value)
|
||||
|
||||
_log("=== CartSnitch Seed Data Generator ===")
|
||||
_log(f"Seed: {seed_value}")
|
||||
|
||||
# --- Generation phase ---
|
||||
t0 = time.monotonic()
|
||||
|
||||
_log("Generating stores...")
|
||||
stores = generate_stores()
|
||||
_log(f" {len(stores)} stores ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating store locations...")
|
||||
store_locations = generate_store_locations(stores)
|
||||
_log(f" {len(store_locations)} store locations ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating users...")
|
||||
users = generate_users(fake)
|
||||
_log(f" {len(users)} users ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating user store accounts...")
|
||||
user_store_accounts = generate_user_store_accounts(users, stores)
|
||||
_log(f" {len(user_store_accounts)} user store accounts ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating products...")
|
||||
products = generate_products(fake)
|
||||
_log(f" {len(products)} products ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating purchases...")
|
||||
purchases = generate_purchases(users, stores, store_locations)
|
||||
_log(f" {len(purchases)} purchases ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating purchase items...")
|
||||
purchase_items = generate_purchase_items(purchases, products)
|
||||
_log(f" {len(purchase_items)} purchase items ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating price history...")
|
||||
price_history = generate_price_history(products, stores, purchase_items)
|
||||
_log(f" {len(price_history)} price history records ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating coupons...")
|
||||
coupons = generate_coupons(fake, products, stores)
|
||||
_log(f" {len(coupons)} coupons ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("Generating shrinkflation events...")
|
||||
shrinkflation_events = generate_shrinkflation_events(products)
|
||||
_log(f" {len(shrinkflation_events)} shrinkflation events ({time.monotonic() - t0:.2f}s)")
|
||||
|
||||
_log("")
|
||||
_log("=== Summary ===")
|
||||
_log(f" stores: {len(stores)}")
|
||||
_log(f" store_locations: {len(store_locations)}")
|
||||
_log(f" users: {len(users)}")
|
||||
_log(f" user_store_accounts: {len(user_store_accounts)}")
|
||||
_log(f" normalized_products: {len(products)}")
|
||||
_log(f" purchases: {len(purchases)}")
|
||||
_log(f" purchase_items: {len(purchase_items)}")
|
||||
_log(f" price_history: {len(price_history)}")
|
||||
_log(f" coupons: {len(coupons)}")
|
||||
_log(f" shrinkflation_events: {len(shrinkflation_events)}")
|
||||
|
||||
if dry_run:
|
||||
_log("")
|
||||
_log("Dry run — no data written.")
|
||||
return
|
||||
|
||||
# --- DB insertion phase ---
|
||||
factory = get_sync_session_factory(database_url)
|
||||
with factory() as session:
|
||||
_log("")
|
||||
_log("Truncating tables (reverse FK order)...")
|
||||
for table in _TRUNCATE_TABLES:
|
||||
session.execute(text(f"TRUNCATE TABLE {table} CASCADE"))
|
||||
_log(" done")
|
||||
|
||||
_log("Inserting stores...")
|
||||
_bulk_insert(session, Store, stores)
|
||||
_log(f" {len(stores)} inserted")
|
||||
|
||||
_log("Inserting store locations...")
|
||||
_bulk_insert(session, StoreLocation, store_locations)
|
||||
_log(f" {len(store_locations)} inserted")
|
||||
|
||||
_log("Inserting users...")
|
||||
_bulk_insert(session, User, users)
|
||||
_log(f" {len(users)} inserted")
|
||||
|
||||
_log("Inserting user store accounts...")
|
||||
_bulk_insert(session, UserStoreAccount, user_store_accounts)
|
||||
_log(f" {len(user_store_accounts)} inserted")
|
||||
|
||||
_log("Inserting products...")
|
||||
_bulk_insert(session, NormalizedProduct, products)
|
||||
_log(f" {len(products)} inserted")
|
||||
|
||||
_log("Inserting purchases...")
|
||||
_bulk_insert(session, Purchase, purchases)
|
||||
_log(f" {len(purchases)} inserted")
|
||||
|
||||
_log("Inserting purchase items...")
|
||||
_bulk_insert(session, PurchaseItem, purchase_items)
|
||||
_log(f" {len(purchase_items)} inserted")
|
||||
|
||||
_log("Inserting price history...")
|
||||
_bulk_insert(session, PriceHistory, price_history)
|
||||
_log(f" {len(price_history)} inserted")
|
||||
|
||||
_log("Inserting coupons...")
|
||||
_bulk_insert(session, Coupon, coupons)
|
||||
_log(f" {len(coupons)} inserted")
|
||||
|
||||
_log("Inserting shrinkflation events...")
|
||||
_bulk_insert(session, ShrinkflationEvent, shrinkflation_events)
|
||||
_log(f" {len(shrinkflation_events)} inserted")
|
||||
|
||||
session.commit()
|
||||
|
||||
elapsed = time.monotonic() - t0
|
||||
_log("")
|
||||
_log(f"Seed complete in {elapsed:.1f}s")
|
||||
Reference in New Issue
Block a user