Merge commit '4cf6f91e954b770198578bcb8db5d98ac964bfed' as 'common'
This commit is contained in:
@@ -0,0 +1,156 @@
|
||||
"""Generate Purchase and PurchaseItem seed data."""
|
||||
|
||||
import random
|
||||
import uuid
|
||||
from datetime import UTC, date, datetime, timedelta
|
||||
from decimal import Decimal
|
||||
|
||||
from cartsnitch_common.seed.config import (
|
||||
NUM_PURCHASE_ITEMS,
|
||||
NUM_PURCHASES,
|
||||
SEED_END_DATE,
|
||||
SEED_START_DATE,
|
||||
)
|
||||
|
||||
_DATE_RANGE_DAYS = (SEED_END_DATE - SEED_START_DATE).days
|
||||
|
||||
|
||||
def _random_date() -> date:
|
||||
return SEED_START_DATE + timedelta(days=random.randint(0, _DATE_RANGE_DAYS))
|
||||
|
||||
|
||||
def _decimal(val: float, places: int = 2) -> Decimal:
|
||||
return Decimal(str(round(val, places)))
|
||||
|
||||
|
||||
def generate_purchases(
|
||||
users: list[dict],
|
||||
stores: list[dict],
|
||||
store_locations: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return NUM_PURCHASES purchase records."""
|
||||
now = datetime.now(tz=UTC)
|
||||
active_users = [u for u in users if u["_active"]]
|
||||
inactive_users = [u for u in users if not u["_active"]]
|
||||
|
||||
# Build location index by store_id
|
||||
locs_by_store: dict = {}
|
||||
for loc in store_locations:
|
||||
locs_by_store.setdefault(loc["store_id"], []).append(loc)
|
||||
|
||||
purchases = []
|
||||
seen_receipts: set[tuple] = set()
|
||||
|
||||
# Active users get 80% of purchases
|
||||
active_count = int(NUM_PURCHASES * 0.8)
|
||||
inactive_count = NUM_PURCHASES - active_count
|
||||
|
||||
def make_purchase(user: dict, store: dict) -> dict | None:
|
||||
receipt_id = f"RCT-{random.randint(100000, 999999)}"
|
||||
key = (user["id"], store["id"], receipt_id)
|
||||
if key in seen_receipts:
|
||||
return None
|
||||
seen_receipts.add(key)
|
||||
subtotal = _decimal(random.uniform(5.0, 150.0))
|
||||
tax = _decimal(float(subtotal) * 0.06)
|
||||
savings = _decimal(random.uniform(0.0, float(subtotal) * 0.3))
|
||||
total = _decimal(float(subtotal) + float(tax) - float(savings))
|
||||
purchase_date = _random_date()
|
||||
store_locs = locs_by_store.get(store["id"], [])
|
||||
store_location_id = random.choice(store_locs)["id"] if store_locs else None
|
||||
ingested_at = datetime(
|
||||
purchase_date.year, purchase_date.month, purchase_date.day, tzinfo=UTC
|
||||
) + timedelta(hours=random.randint(1, 48))
|
||||
return {
|
||||
"id": uuid.uuid4(),
|
||||
"user_id": user["id"],
|
||||
"store_id": store["id"],
|
||||
"store_location_id": store_location_id,
|
||||
"receipt_id": receipt_id,
|
||||
"purchase_date": purchase_date,
|
||||
"total": total,
|
||||
"subtotal": subtotal,
|
||||
"tax": tax,
|
||||
"savings_total": savings if float(savings) > 0 else None,
|
||||
"source_url": None,
|
||||
"raw_data": None,
|
||||
"ingested_at": ingested_at,
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
|
||||
for _ in range(active_count):
|
||||
user = random.choice(active_users)
|
||||
store = random.choice(stores)
|
||||
p = make_purchase(user, store)
|
||||
if p:
|
||||
purchases.append(p)
|
||||
|
||||
for _ in range(inactive_count):
|
||||
user = random.choice(inactive_users)
|
||||
store = random.choice(stores)
|
||||
p = make_purchase(user, store)
|
||||
if p:
|
||||
purchases.append(p)
|
||||
|
||||
return purchases[:NUM_PURCHASES]
|
||||
|
||||
|
||||
def generate_purchase_items(
|
||||
purchases: list[dict],
|
||||
products: list[dict],
|
||||
) -> list[dict]:
|
||||
"""Return ~NUM_PURCHASE_ITEMS purchase item records distributed across purchases."""
|
||||
now = datetime.now(tz=UTC)
|
||||
items: list[dict] = []
|
||||
total_target = NUM_PURCHASE_ITEMS
|
||||
num_purchases = len(purchases)
|
||||
|
||||
# Distribute items: avg 5 per purchase with variance
|
||||
for i, purchase in enumerate(purchases):
|
||||
# Remaining purchases get proportional share
|
||||
remaining_purchases = num_purchases - i
|
||||
remaining_items = total_target - len(items)
|
||||
if remaining_purchases <= 0 or remaining_items <= 0:
|
||||
break
|
||||
avg = remaining_items / remaining_purchases
|
||||
count = max(1, min(15, int(random.gauss(avg, 2))))
|
||||
count = min(count, remaining_items)
|
||||
|
||||
for _ in range(count):
|
||||
product = random.choice(products)
|
||||
unit_price = _decimal(random.uniform(0.99, 25.99))
|
||||
quantity = Decimal("1.000")
|
||||
extended_price = _decimal(float(unit_price) * float(quantity))
|
||||
has_sale = random.random() > 0.7
|
||||
sale_price = (
|
||||
_decimal(float(unit_price) * random.uniform(0.7, 0.95)) if has_sale else None
|
||||
)
|
||||
has_coupon = random.random() > 0.85
|
||||
coupon_discount = _decimal(random.uniform(0.25, 2.00)) if has_coupon else None
|
||||
|
||||
upc = None
|
||||
if product["upc_variants"]:
|
||||
upc = random.choice(product["upc_variants"])
|
||||
|
||||
items.append(
|
||||
{
|
||||
"id": uuid.uuid4(),
|
||||
"purchase_id": purchase["id"],
|
||||
"product_name_raw": product["canonical_name"],
|
||||
"upc": upc,
|
||||
"quantity": quantity,
|
||||
"unit_price": unit_price,
|
||||
"extended_price": extended_price,
|
||||
"regular_price": unit_price,
|
||||
"sale_price": sale_price,
|
||||
"coupon_discount": coupon_discount,
|
||||
"loyalty_discount": None,
|
||||
"category_raw": product["category"].value if product["category"] else None,
|
||||
"normalized_product_id": product["id"],
|
||||
"created_at": now,
|
||||
"updated_at": now,
|
||||
}
|
||||
)
|
||||
|
||||
return items
|
||||
Reference in New Issue
Block a user