forked from cartsnitch/app
04fd86cf8d
git-subtree-dir: common git-subtree-split: 28b2939037b5932ca5d5a6c734b292c012ac675f
122 lines
4.9 KiB
SQL
122 lines
4.9 KiB
SQL
-- =============================================================================
|
|
-- Stat 2: Annual savings potential from cross-store price comparison
|
|
-- Validates: "$336/year potential savings from buying the same items
|
|
-- at the cheapest store" (launch announcement)
|
|
--
|
|
-- Methodology:
|
|
-- 1. For each (normalized_product_id, store_id), take the MOST RECENT
|
|
-- regular_price within the past 90 days ("current" price).
|
|
-- 2. Keep only products observed at 2+ distinct stores.
|
|
-- 3. For each product: savings_per_purchase = avg_price - min_price across stores.
|
|
-- 4. Annualise: multiply by an assumed purchase frequency of 26x/year
|
|
-- (~every 2 weeks for regularly purchased grocery items).
|
|
-- 5. Sum across all eligible products to get total annual savings potential.
|
|
--
|
|
-- Sensitivity:
|
|
-- Change the frequency constant (26) and lookback interval (90 days) to
|
|
-- explore how sensitive the $336 figure is to these assumptions.
|
|
--
|
|
-- Run against production Postgres once infrastructure is available.
|
|
-- =============================================================================
|
|
|
|
-- Step 1: most-recent price per (product, store) within the past 90 days
|
|
WITH latest_prices AS (
|
|
SELECT DISTINCT ON (ph.normalized_product_id, ph.store_id)
|
|
ph.normalized_product_id,
|
|
ph.store_id,
|
|
s.slug AS store_slug,
|
|
ph.regular_price AS current_price,
|
|
ph.observed_date
|
|
FROM price_history ph
|
|
JOIN stores s ON s.id = ph.store_id
|
|
WHERE ph.observed_date >= CURRENT_DATE - INTERVAL '90 days'
|
|
AND ph.regular_price > 0
|
|
ORDER BY
|
|
ph.normalized_product_id,
|
|
ph.store_id,
|
|
ph.observed_date DESC
|
|
),
|
|
|
|
-- Step 2: aggregate per product — only keep products seen at 2+ stores
|
|
product_price_spread AS (
|
|
SELECT
|
|
lp.normalized_product_id,
|
|
COUNT(DISTINCT lp.store_id) AS store_count,
|
|
MIN(lp.current_price) AS cheapest_price,
|
|
AVG(lp.current_price) AS avg_price,
|
|
MAX(lp.current_price) AS most_expensive_price,
|
|
MAX(lp.current_price) - MIN(lp.current_price) AS price_range
|
|
FROM latest_prices lp
|
|
GROUP BY lp.normalized_product_id
|
|
HAVING COUNT(DISTINCT lp.store_id) >= 2
|
|
),
|
|
|
|
-- Step 3: compute savings_per_purchase and annualise
|
|
-- Purchase frequency assumption: 26 purchases/year per product (~every 2 weeks)
|
|
-- This is a conservative estimate for regularly purchased grocery items.
|
|
savings_per_product AS (
|
|
SELECT
|
|
pps.normalized_product_id,
|
|
np.canonical_name,
|
|
np.category,
|
|
pps.store_count,
|
|
pps.cheapest_price,
|
|
pps.avg_price,
|
|
pps.price_range,
|
|
ROUND(pps.avg_price - pps.cheapest_price, 2) AS savings_per_purchase,
|
|
ROUND((pps.avg_price - pps.cheapest_price) * 26, 2) AS annual_savings_at_26x
|
|
FROM product_price_spread pps
|
|
JOIN normalized_products np ON np.id = pps.normalized_product_id
|
|
)
|
|
|
|
-- Final summary: total annual savings potential
|
|
SELECT
|
|
COUNT(*) AS eligible_product_count,
|
|
ROUND(AVG(savings_per_purchase), 4) AS avg_savings_per_purchase,
|
|
ROUND(SUM(annual_savings_at_26x), 2) AS total_annual_savings_26x_freq,
|
|
-- Sensitivity: alternative frequencies
|
|
ROUND(SUM(savings_per_purchase) * 20, 2) AS total_annual_savings_20x_freq,
|
|
ROUND(SUM(savings_per_purchase) * 52, 2) AS total_annual_savings_52x_freq
|
|
FROM savings_per_product;
|
|
|
|
|
|
-- Per-product detail (top 50 by annual savings opportunity)
|
|
WITH latest_prices AS (
|
|
SELECT DISTINCT ON (ph.normalized_product_id, ph.store_id)
|
|
ph.normalized_product_id,
|
|
ph.store_id,
|
|
s.slug AS store_slug,
|
|
ph.regular_price AS current_price,
|
|
ph.observed_date
|
|
FROM price_history ph
|
|
JOIN stores s ON s.id = ph.store_id
|
|
WHERE ph.observed_date >= CURRENT_DATE - INTERVAL '90 days'
|
|
AND ph.regular_price > 0
|
|
ORDER BY ph.normalized_product_id, ph.store_id, ph.observed_date DESC
|
|
),
|
|
product_price_spread AS (
|
|
SELECT
|
|
lp.normalized_product_id,
|
|
COUNT(DISTINCT lp.store_id) AS store_count,
|
|
MIN(lp.current_price) AS cheapest_price,
|
|
AVG(lp.current_price) AS avg_price
|
|
FROM latest_prices lp
|
|
GROUP BY lp.normalized_product_id
|
|
HAVING COUNT(DISTINCT lp.store_id) >= 2
|
|
)
|
|
SELECT
|
|
np.canonical_name,
|
|
np.category,
|
|
np.brand,
|
|
np.size,
|
|
np.size_unit,
|
|
pps.store_count,
|
|
pps.cheapest_price,
|
|
ROUND(pps.avg_price, 2) AS avg_price,
|
|
ROUND(pps.avg_price - pps.cheapest_price, 2) AS savings_per_purchase,
|
|
ROUND((pps.avg_price - pps.cheapest_price) * 26, 2) AS annual_savings_at_26x
|
|
FROM product_price_spread pps
|
|
JOIN normalized_products np ON np.id = pps.normalized_product_id
|
|
ORDER BY annual_savings_at_26x DESC
|
|
LIMIT 50;
|