forked from cartsnitch/cartsnitch
358 lines
11 KiB
Python
358 lines
11 KiB
Python
"""Tests for the seed data generator."""
|
|
|
|
import random
|
|
|
|
from faker import Faker
|
|
|
|
from cartsnitch_common.seed.config import (
|
|
NUM_ACTIVE_USERS,
|
|
NUM_COUPONS,
|
|
NUM_PRICE_HISTORY,
|
|
NUM_PRODUCTS,
|
|
NUM_PURCHASE_ITEMS,
|
|
NUM_PURCHASES,
|
|
NUM_SHRINKFLATION_EVENTS,
|
|
NUM_STORES,
|
|
NUM_USERS,
|
|
SEED_END_DATE,
|
|
SEED_START_DATE,
|
|
SEED_VALUE,
|
|
)
|
|
from cartsnitch_common.seed.generators.coupons import generate_coupons
|
|
from cartsnitch_common.seed.generators.prices import generate_price_history
|
|
from cartsnitch_common.seed.generators.products import generate_products
|
|
from cartsnitch_common.seed.generators.purchases import generate_purchase_items, generate_purchases
|
|
from cartsnitch_common.seed.generators.shrinkflation import generate_shrinkflation_events
|
|
from cartsnitch_common.seed.generators.stores import generate_store_locations, generate_stores
|
|
from cartsnitch_common.seed.generators.users import generate_users
|
|
|
|
|
|
def _seed() -> None:
|
|
random.seed(SEED_VALUE)
|
|
Faker.seed(SEED_VALUE)
|
|
|
|
|
|
def _make_fake() -> Faker:
|
|
return Faker()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Stores
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_stores_count() -> None:
|
|
stores = generate_stores()
|
|
assert len(stores) == NUM_STORES
|
|
|
|
|
|
def test_generate_stores_deterministic() -> None:
|
|
stores_a = generate_stores()
|
|
stores_b = generate_stores()
|
|
# Stores are fixed (no RNG), so slugs are stable
|
|
slugs_a = {s["slug"] for s in stores_a}
|
|
slugs_b = {s["slug"] for s in stores_b}
|
|
assert slugs_a == slugs_b
|
|
|
|
|
|
def test_generate_store_locations_count() -> None:
|
|
stores = generate_stores()
|
|
locs = generate_store_locations(stores)
|
|
assert len(locs) == 15 # 3 stores * 5 locations
|
|
|
|
|
|
def test_generate_store_locations_fk() -> None:
|
|
stores = generate_stores()
|
|
locs = generate_store_locations(stores)
|
|
store_ids = {s["id"] for s in stores}
|
|
for loc in locs:
|
|
assert loc["store_id"] in store_ids
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Users
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_users_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
users = generate_users(fake)
|
|
assert len(users) == NUM_USERS
|
|
|
|
|
|
def test_generate_users_active_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
users = generate_users(fake)
|
|
active = [u for u in users if u["_active"]]
|
|
assert len(active) == NUM_ACTIVE_USERS
|
|
|
|
|
|
def test_generate_users_deterministic() -> None:
|
|
_seed()
|
|
fake_a = _make_fake()
|
|
users_a = generate_users(fake_a)
|
|
|
|
_seed()
|
|
fake_b = _make_fake()
|
|
users_b = generate_users(fake_b)
|
|
|
|
# Emails should match (same seed → same Faker output)
|
|
emails_a = [u["email"] for u in users_a]
|
|
emails_b = [u["email"] for u in users_b]
|
|
assert emails_a == emails_b
|
|
|
|
|
|
def test_generate_users_unique_emails() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
users = generate_users(fake)
|
|
emails = [u["email"] for u in users]
|
|
assert len(emails) == len(set(emails))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Products
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_products_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
assert len(products) == NUM_PRODUCTS
|
|
|
|
|
|
def test_generate_products_deterministic() -> None:
|
|
_seed()
|
|
fake_a = _make_fake()
|
|
products_a = generate_products(fake_a)
|
|
|
|
_seed()
|
|
fake_b = _make_fake()
|
|
products_b = generate_products(fake_b)
|
|
|
|
names_a = [p["canonical_name"] for p in products_a]
|
|
names_b = [p["canonical_name"] for p in products_b]
|
|
assert names_a == names_b
|
|
|
|
|
|
def test_generate_products_have_categories() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
for product in products:
|
|
assert product["category"] is not None
|
|
|
|
|
|
def test_generate_products_have_upc_variants() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
for product in products:
|
|
assert product["upc_variants"]
|
|
assert isinstance(product["upc_variants"], list)
|
|
assert len(product["upc_variants"]) >= 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Purchases
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_purchases_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
assert len(purchases) == NUM_PURCHASES
|
|
|
|
|
|
def test_generate_purchases_fk() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
|
|
user_ids = {u["id"] for u in users}
|
|
store_ids = {s["id"] for s in stores}
|
|
for p in purchases:
|
|
assert p["user_id"] in user_ids
|
|
assert p["store_id"] in store_ids
|
|
|
|
|
|
def test_generate_purchase_items_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
products = generate_products(fake)
|
|
items = generate_purchase_items(purchases, products)
|
|
# Should be close to target (within 20%)
|
|
assert abs(len(items) - NUM_PURCHASE_ITEMS) < NUM_PURCHASE_ITEMS * 0.20
|
|
|
|
|
|
def test_generate_purchase_items_fk() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
products = generate_products(fake)
|
|
items = generate_purchase_items(purchases, products)
|
|
|
|
purchase_ids = {p["id"] for p in purchases}
|
|
product_ids = {p["id"] for p in products}
|
|
for item in items:
|
|
assert item["purchase_id"] in purchase_ids
|
|
assert item["normalized_product_id"] in product_ids
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Price History
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_price_history_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
products = generate_products(fake)
|
|
items = generate_purchase_items(purchases, products)
|
|
prices = generate_price_history(products, stores, items)
|
|
# Should be within 10% of target
|
|
assert abs(len(prices) - NUM_PRICE_HISTORY) < NUM_PRICE_HISTORY * 0.10
|
|
|
|
|
|
def test_generate_price_history_fk() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
products = generate_products(fake)
|
|
items = generate_purchase_items(purchases, products)
|
|
prices = generate_price_history(products, stores, items)
|
|
|
|
product_ids = {p["id"] for p in products}
|
|
store_ids = {s["id"] for s in stores}
|
|
for ph in prices:
|
|
assert ph["normalized_product_id"] in product_ids
|
|
assert ph["store_id"] in store_ids
|
|
assert ph["regular_price"] > 0
|
|
|
|
|
|
def test_price_history_dates_in_range() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
store_locs = generate_store_locations(stores)
|
|
users = generate_users(fake)
|
|
purchases = generate_purchases(users, stores, store_locs)
|
|
products = generate_products(fake)
|
|
items = generate_purchase_items(purchases, products)
|
|
prices = generate_price_history(products, stores, items)
|
|
|
|
for ph in prices:
|
|
assert SEED_START_DATE <= ph["observed_date"] <= SEED_END_DATE
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Coupons
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_coupons_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
products = generate_products(fake)
|
|
coupons = generate_coupons(fake, products, stores)
|
|
assert len(coupons) == NUM_COUPONS
|
|
|
|
|
|
def test_generate_coupons_mix() -> None:
|
|
"""Verify ~60% expired and ~40% active."""
|
|
_seed()
|
|
fake = _make_fake()
|
|
stores = generate_stores()
|
|
products = generate_products(fake)
|
|
coupons = generate_coupons(fake, products, stores)
|
|
|
|
expired = [c for c in coupons if c["valid_to"] < SEED_END_DATE]
|
|
active = [c for c in coupons if c["valid_to"] >= SEED_END_DATE]
|
|
# Allow ±15% variance from target
|
|
assert len(expired) / NUM_COUPONS > 0.45
|
|
assert len(active) / NUM_COUPONS > 0.25
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Shrinkflation
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_generate_shrinkflation_count() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
events = generate_shrinkflation_events(products)
|
|
assert len(events) == NUM_SHRINKFLATION_EVENTS
|
|
|
|
|
|
def test_generate_shrinkflation_fk() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
events = generate_shrinkflation_events(products)
|
|
product_ids = {p["id"] for p in products}
|
|
for event in events:
|
|
assert event["normalized_product_id"] in product_ids
|
|
|
|
|
|
def test_generate_shrinkflation_price_held_or_increased() -> None:
|
|
"""Validate shrinkflation: new_size < old_size, price maintained or up."""
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
events = generate_shrinkflation_events(products)
|
|
for event in events:
|
|
old_size = float(event["old_size"])
|
|
new_size = float(event["new_size"])
|
|
assert new_size < old_size, f"Expected size reduction: {old_size} -> {new_size}"
|
|
if event["price_at_old_size"] and event["price_at_new_size"]:
|
|
# Price should be maintained or increased (not significantly dropped)
|
|
assert float(event["price_at_new_size"]) >= float(event["price_at_old_size"]) * 0.95
|
|
|
|
|
|
def test_generate_shrinkflation_confidence_range() -> None:
|
|
_seed()
|
|
fake = _make_fake()
|
|
products = generate_products(fake)
|
|
events = generate_shrinkflation_events(products)
|
|
for event in events:
|
|
assert 0 <= float(event["confidence"]) <= 1.0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Dry-run smoke test
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_dry_run_does_not_raise() -> None:
|
|
"""Smoke test the full run_seed in dry-run mode."""
|
|
from cartsnitch_common.seed.runner import run_seed
|
|
|
|
run_seed(dry_run=True, seed_value=SEED_VALUE)
|