Files
cartsnitch-fork-test/common/scripts/stats/savings_potential.sql
T

122 lines
4.9 KiB
SQL

-- =============================================================================
-- Stat 2: Annual savings potential from cross-store price comparison
-- Validates: "$336/year potential savings from buying the same items
-- at the cheapest store" (launch announcement)
--
-- Methodology:
-- 1. For each (normalized_product_id, store_id), take the MOST RECENT
-- regular_price within the past 90 days ("current" price).
-- 2. Keep only products observed at 2+ distinct stores.
-- 3. For each product: savings_per_purchase = avg_price - min_price across stores.
-- 4. Annualise: multiply by an assumed purchase frequency of 26x/year
-- (~every 2 weeks for regularly purchased grocery items).
-- 5. Sum across all eligible products to get total annual savings potential.
--
-- Sensitivity:
-- Change the frequency constant (26) and lookback interval (90 days) to
-- explore how sensitive the $336 figure is to these assumptions.
--
-- Run against production Postgres once infrastructure is available.
-- =============================================================================
-- Step 1: most-recent price per (product, store) within the past 90 days
WITH latest_prices AS (
SELECT DISTINCT ON (ph.normalized_product_id, ph.store_id)
ph.normalized_product_id,
ph.store_id,
s.slug AS store_slug,
ph.regular_price AS current_price,
ph.observed_date
FROM price_history ph
JOIN stores s ON s.id = ph.store_id
WHERE ph.observed_date >= CURRENT_DATE - INTERVAL '90 days'
AND ph.regular_price > 0
ORDER BY
ph.normalized_product_id,
ph.store_id,
ph.observed_date DESC
),
-- Step 2: aggregate per product — only keep products seen at 2+ stores
product_price_spread AS (
SELECT
lp.normalized_product_id,
COUNT(DISTINCT lp.store_id) AS store_count,
MIN(lp.current_price) AS cheapest_price,
AVG(lp.current_price) AS avg_price,
MAX(lp.current_price) AS most_expensive_price,
MAX(lp.current_price) - MIN(lp.current_price) AS price_range
FROM latest_prices lp
GROUP BY lp.normalized_product_id
HAVING COUNT(DISTINCT lp.store_id) >= 2
),
-- Step 3: compute savings_per_purchase and annualise
-- Purchase frequency assumption: 26 purchases/year per product (~every 2 weeks)
-- This is a conservative estimate for regularly purchased grocery items.
savings_per_product AS (
SELECT
pps.normalized_product_id,
np.canonical_name,
np.category,
pps.store_count,
pps.cheapest_price,
pps.avg_price,
pps.price_range,
ROUND(pps.avg_price - pps.cheapest_price, 2) AS savings_per_purchase,
ROUND((pps.avg_price - pps.cheapest_price) * 26, 2) AS annual_savings_at_26x
FROM product_price_spread pps
JOIN normalized_products np ON np.id = pps.normalized_product_id
)
-- Final summary: total annual savings potential
SELECT
COUNT(*) AS eligible_product_count,
ROUND(AVG(savings_per_purchase), 4) AS avg_savings_per_purchase,
ROUND(SUM(annual_savings_at_26x), 2) AS total_annual_savings_26x_freq,
-- Sensitivity: alternative frequencies
ROUND(SUM(savings_per_purchase) * 20, 2) AS total_annual_savings_20x_freq,
ROUND(SUM(savings_per_purchase) * 52, 2) AS total_annual_savings_52x_freq
FROM savings_per_product;
-- Per-product detail (top 50 by annual savings opportunity)
WITH latest_prices AS (
SELECT DISTINCT ON (ph.normalized_product_id, ph.store_id)
ph.normalized_product_id,
ph.store_id,
s.slug AS store_slug,
ph.regular_price AS current_price,
ph.observed_date
FROM price_history ph
JOIN stores s ON s.id = ph.store_id
WHERE ph.observed_date >= CURRENT_DATE - INTERVAL '90 days'
AND ph.regular_price > 0
ORDER BY ph.normalized_product_id, ph.store_id, ph.observed_date DESC
),
product_price_spread AS (
SELECT
lp.normalized_product_id,
COUNT(DISTINCT lp.store_id) AS store_count,
MIN(lp.current_price) AS cheapest_price,
AVG(lp.current_price) AS avg_price
FROM latest_prices lp
GROUP BY lp.normalized_product_id
HAVING COUNT(DISTINCT lp.store_id) >= 2
)
SELECT
np.canonical_name,
np.category,
np.brand,
np.size,
np.size_unit,
pps.store_count,
pps.cheapest_price,
ROUND(pps.avg_price, 2) AS avg_price,
ROUND(pps.avg_price - pps.cheapest_price, 2) AS savings_per_purchase,
ROUND((pps.avg_price - pps.cheapest_price) * 26, 2) AS annual_savings_at_26x
FROM product_price_spread pps
JOIN normalized_products np ON np.id = pps.normalized_product_id
ORDER BY annual_savings_at_26x DESC
LIMIT 50;