Squashed 'receiptwitness/' content from commit e8d374a

git-subtree-dir: receiptwitness
git-subtree-split: e8d374a89ed8978f429598e02d31b1c5963efe22
This commit is contained in:
Coupon Carl
2026-03-28 02:24:22 +00:00
commit 342906c9d1
53 changed files with 7300 additions and 0 deletions
View File
+58
View File
@@ -0,0 +1,58 @@
"""Tests for the base scraper class."""
from datetime import datetime
from unittest.mock import patch
import pytest
from receiptwitness.scrapers.base import BaseScraper, RawReceipt, SessionData
class ConcreteScraper(BaseScraper):
"""Concrete implementation for testing the abstract base."""
async def login(self, username, password):
return SessionData(
cookies=[],
user_agent="test",
created_at=datetime.now(),
)
async def check_session(self, session):
return True
async def scrape_receipts(self, session, since=None):
return []
def parse_receipt(self, raw):
return {}
class TestBaseScraper:
@pytest.mark.asyncio
async def test_human_delay_respects_bounds(self):
scraper = ConcreteScraper()
with patch("receiptwitness.scrapers.base.asyncio.sleep") as mock_sleep:
mock_sleep.return_value = None
await scraper.human_delay(min_ms=100, max_ms=200)
call_args = mock_sleep.call_args[0][0]
assert 0.1 <= call_args <= 0.2
def test_raw_receipt_dataclass(self):
receipt = RawReceipt(
receipt_id="test-123",
purchase_date="2026-03-10",
store_number="42",
raw_data={"key": "value"},
)
assert receipt.receipt_id == "test-123"
assert receipt.raw_data == {"key": "value"}
def test_session_data_defaults(self):
session = SessionData(
cookies=[],
user_agent="test",
created_at=datetime.now(),
)
assert session.expires_at is None
assert session.extra == {}
+574
View File
@@ -0,0 +1,574 @@
"""Tests for the Kroger scraper.
These tests mock Playwright to avoid requiring real Kroger credentials
or network access. They verify the scraper's control flow, session handling,
date filtering, and error resilience.
"""
from datetime import UTC, datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from receiptwitness.scrapers.base import RawReceipt, SessionData
from receiptwitness.scrapers.kroger import (
DEFAULT_TIMEZONE,
DEFAULT_USER_AGENT,
DEFAULT_VIEWPORT,
KROGER_BASE,
KROGER_LOGIN_PAGE,
KROGER_PURCHASE_HISTORY,
KrogerScraper,
)
@pytest.fixture
def scraper():
return KrogerScraper()
@pytest.fixture
def valid_session():
return SessionData(
cookies=[{"name": "session", "value": "abc123", "domain": ".kroger.com", "path": "/"}],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=datetime.now(UTC) + timedelta(hours=2),
extra={"retailer": "kroger"},
)
@pytest.fixture
def expired_session():
return SessionData(
cookies=[{"name": "session", "value": "expired", "domain": ".kroger.com", "path": "/"}],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC) - timedelta(hours=4),
expires_at=datetime.now(UTC) - timedelta(hours=2),
)
class TestKrogerScraperConstants:
def test_base_url(self):
assert KROGER_BASE == "https://www.kroger.com"
def test_login_page(self):
assert KROGER_LOGIN_PAGE == "https://www.kroger.com/signin"
def test_purchase_history_page(self):
assert KROGER_PURCHASE_HISTORY == "https://www.kroger.com/mypurchases"
def test_default_user_agent_is_chrome(self):
assert "Chrome" in DEFAULT_USER_AGENT
assert "Windows" in DEFAULT_USER_AGENT
def test_default_viewport_hd(self):
assert DEFAULT_VIEWPORT == {"width": 1920, "height": 1080}
def test_default_timezone(self):
assert DEFAULT_TIMEZONE == "America/New_York"
class TestCheckSession:
@pytest.mark.asyncio
async def test_expired_session_returns_false(self, scraper, expired_session):
result = await scraper.check_session(expired_session)
assert result is False
@pytest.mark.asyncio
async def test_no_expiry_checks_via_browser(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/account/dashboard"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is True
@pytest.mark.asyncio
async def test_session_redirected_to_signin_returns_false(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/signin?redirectUrl=account"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is False
class TestLogin:
@pytest.mark.asyncio
async def test_login_returns_session_data(self, scraper):
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/"
# Mock locator chain
mock_email = AsyncMock()
mock_password = AsyncMock()
mock_button = AsyncMock()
mock_page.locator = MagicMock(side_effect=[mock_email, mock_password, mock_button])
mock_page.wait_for_url = AsyncMock()
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.cookies = AsyncMock(
return_value=[
{"name": "kroger_session", "value": "test123", "domain": ".kroger.com", "path": "/"}
]
)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
session = await scraper.login("user@test.com", "password123")
assert isinstance(session, SessionData)
assert len(session.cookies) == 1
assert session.cookies[0]["name"] == "kroger_session"
assert session.user_agent == DEFAULT_USER_AGENT
assert session.expires_at is not None
assert session.extra == {"retailer": "kroger"}
class TestScrapeReceipts:
@pytest.mark.asyncio
async def test_scrape_returns_receipts(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.status = 200
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{
"orderId": "KR-001",
"purchaseDate": "2026-03-10T14:00:00Z",
"storeNumber": "357",
},
{
"orderId": "KR-002",
"purchaseDate": "2026-03-11T10:00:00Z",
"storeNumber": "357",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={"items": []})
mock_request = AsyncMock()
mock_request.get = AsyncMock(
side_effect=[mock_api_response, mock_detail_response, mock_detail_response]
)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 2
assert receipts[0].receipt_id == "KR-001"
assert receipts[1].receipt_id == "KR-002"
assert isinstance(receipts[0], RawReceipt)
@pytest.mark.asyncio
async def test_scrape_filters_by_date(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{
"orderId": "KR-OLD",
"purchaseDate": "2026-01-01T10:00:00Z",
"storeNumber": "357",
},
{
"orderId": "KR-NEW",
"purchaseDate": "2026-03-15T10:00:00Z",
"storeNumber": "357",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
since = datetime(2026, 3, 1, tzinfo=UTC)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session, since=since)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-NEW"
@pytest.mark.asyncio
async def test_scrape_handles_api_failure(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = False
mock_api_response.status = 500
mock_api_response.status_text = "Internal Server Error"
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_handles_unexpected_response(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(return_value="not a dict")
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_alternative_field_names(self, scraper, valid_session):
"""Kroger may use 'purchases' instead of 'orders'."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"purchases": [
{
"receiptId": "KR-ALT-001",
"transactionDate": "2026-03-10T14:00:00Z",
"divisionNumber": "014",
}
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-ALT-001"
@pytest.mark.asyncio
async def test_scrape_skips_orders_without_id(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{"purchaseDate": "2026-03-10T14:00:00Z"}, # no id
{"orderId": "KR-VALID", "purchaseDate": "2026-03-10T14:00:00Z"},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-VALID"
@pytest.mark.asyncio
async def test_scrape_skips_orders_with_null_id(self, scraper, valid_session):
"""Ensure orderId: null doesn't produce receipt_id='None' (str(None) bug)."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{"orderId": None, "receiptId": None, "purchaseDate": "2026-03-10T14:00:00Z"},
{"orderId": "KR-REAL", "purchaseDate": "2026-03-10T14:00:00Z"},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-REAL"
# Verify no receipt has the string "None" as its ID
assert all(r.receipt_id != "None" for r in receipts)
class TestParseReceipt:
def test_parse_receipt_delegates_to_parser(self, scraper):
raw = RawReceipt(
receipt_id="KR-001",
purchase_date="2026-03-12",
raw_data={
"detail": {
"items": [
{
"description": "TEST ITEM",
"basePrice": 5.00,
"totalPrice": 5.00,
}
],
"total": 5.00,
}
},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "KR-001"
assert len(result["items"]) == 1
def test_receipt_detail_failure_returns_empty(self, scraper):
"""Verify receipt detail failures produce empty detail."""
raw = RawReceipt(
receipt_id="KR-FAIL",
purchase_date="2026-03-12",
raw_data={"total": 10.00, "detail": {}},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "KR-FAIL"
assert result["items"] == []
+585
View File
@@ -0,0 +1,585 @@
"""Tests for the Meijer scraper.
These tests mock Playwright to avoid requiring real Meijer credentials
or network access. They verify the scraper's control flow, session handling,
date filtering, and error resilience.
"""
from datetime import UTC, datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from receiptwitness.scrapers.base import RawReceipt, SessionData
from receiptwitness.scrapers.meijer import (
DEFAULT_TIMEZONE,
DEFAULT_USER_AGENT,
DEFAULT_VIEWPORT,
MEIJER_BASE,
MEIJER_LOGIN_PAGE,
MEIJER_MPERKS_HOME,
MEIJER_PURCHASE_HISTORY,
MeijerScraper,
)
@pytest.fixture
def scraper():
return MeijerScraper()
@pytest.fixture
def valid_session():
return SessionData(
cookies=[
{"name": "meijer_session", "value": "abc123", "domain": ".meijer.com", "path": "/"}
],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=datetime.now(UTC) + timedelta(hours=4),
)
@pytest.fixture
def expired_session():
return SessionData(
cookies=[
{"name": "meijer_session", "value": "expired", "domain": ".meijer.com", "path": "/"}
],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC) - timedelta(hours=8),
expires_at=datetime.now(UTC) - timedelta(hours=4),
)
class TestMeijerScraperConstants:
def test_base_url(self):
assert MEIJER_BASE == "https://www.meijer.com"
def test_login_page(self):
assert MEIJER_LOGIN_PAGE == "https://www.meijer.com/shopping/login.html"
def test_mperks_home(self):
assert MEIJER_MPERKS_HOME == "https://www.meijer.com/mperks.html"
def test_purchase_history_url(self):
assert (
MEIJER_PURCHASE_HISTORY == "https://www.meijer.com/bin/meijer/profile/purchasehistory"
)
def test_default_user_agent_is_chrome(self):
assert "Chrome" in DEFAULT_USER_AGENT
assert "Windows" in DEFAULT_USER_AGENT
def test_default_viewport_hd(self):
assert DEFAULT_VIEWPORT == {"width": 1920, "height": 1080}
def test_default_timezone(self):
assert DEFAULT_TIMEZONE == "America/Detroit"
class TestCheckSession:
@pytest.mark.asyncio
async def test_expired_session_returns_false(self, scraper, expired_session):
result = await scraper.check_session(expired_session)
assert result is False
@pytest.mark.asyncio
async def test_no_expiry_checks_via_browser(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.meijer.com/mperks.html"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is True
@pytest.mark.asyncio
async def test_session_redirected_to_login_returns_false(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.meijer.com/shopping/login.html?redirect=mperks"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is False
class TestLogin:
@pytest.mark.asyncio
async def test_login_returns_session_data(self, scraper):
mock_page = AsyncMock()
mock_page.url = "https://www.meijer.com/mperks.html"
# Mock locator chain
mock_email = AsyncMock()
mock_password = AsyncMock()
mock_button = AsyncMock()
mock_page.locator = MagicMock(side_effect=[mock_email, mock_password, mock_button])
mock_page.wait_for_url = AsyncMock()
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.cookies = AsyncMock(
return_value=[
{"name": "meijer_session", "value": "test456", "domain": ".meijer.com", "path": "/"}
]
)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
session = await scraper.login("user@test.com", "password123")
assert isinstance(session, SessionData)
assert len(session.cookies) == 1
assert session.cookies[0]["name"] == "meijer_session"
assert session.user_agent == DEFAULT_USER_AGENT
assert session.expires_at is not None
# Meijer sessions last 4 hours
assert session.expires_at > session.created_at + timedelta(hours=3)
class TestScrapeReceipts:
@pytest.mark.asyncio
async def test_scrape_returns_receipts(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.status = 200
mock_api_response.json = AsyncMock(
return_value={
"transactions": [
{
"transactionId": "TXN-001",
"transactionDate": "2026-03-10T14:00:00Z",
"storeNumber": "42",
},
{
"transactionId": "TXN-002",
"transactionDate": "2026-03-11T10:00:00Z",
"storeNumber": "42",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={"items": []})
mock_request = AsyncMock()
mock_request.get = AsyncMock(
side_effect=[mock_api_response, mock_detail_response, mock_detail_response]
)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 2
assert receipts[0].receipt_id == "TXN-001"
assert receipts[1].receipt_id == "TXN-002"
assert isinstance(receipts[0], RawReceipt)
@pytest.mark.asyncio
async def test_scrape_filters_by_date(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"transactions": [
{
"transactionId": "TXN-OLD",
"transactionDate": "2026-01-01T10:00:00Z",
"storeNumber": "42",
},
{
"transactionId": "TXN-NEW",
"transactionDate": "2026-03-15T10:00:00Z",
"storeNumber": "42",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
since = datetime(2026, 3, 1, tzinfo=UTC)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session, since=since)
assert len(receipts) == 1
assert receipts[0].receipt_id == "TXN-NEW"
@pytest.mark.asyncio
async def test_scrape_handles_api_failure(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = False
mock_api_response.status = 500
mock_api_response.status_text = "Internal Server Error"
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_handles_unexpected_response(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(return_value="not a dict")
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_alternative_field_names(self, scraper, valid_session):
"""Meijer may use 'purchaseHistory' instead of 'transactions'."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"purchaseHistory": [
{
"receiptId": "MJ-ALT-001",
"purchaseDate": "2026-03-10T14:00:00Z",
"storeId": "99",
}
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "MJ-ALT-001"
@pytest.mark.asyncio
async def test_scrape_skips_transactions_without_id(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"transactions": [
{"transactionDate": "2026-03-10T14:00:00Z"}, # no id
{"transactionId": "TXN-VALID", "transactionDate": "2026-03-10T14:00:00Z"},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "TXN-VALID"
@pytest.mark.asyncio
async def test_scrape_receipt_detail_failure_returns_empty_detail(self, scraper, valid_session):
"""Receipt detail API failure should not crash the scraper."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"transactions": [
{
"transactionId": "TXN-DETAIL-FAIL",
"transactionDate": "2026-03-10T14:00:00Z",
"storeNumber": "42",
}
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = False
mock_detail_response.status = 404
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "TXN-DETAIL-FAIL"
assert receipts[0].raw_data.get("detail") == {}
class TestParseReceipt:
def test_parse_receipt_delegates_to_parser(self, scraper):
raw = RawReceipt(
receipt_id="TXN-001",
purchase_date="2026-03-10",
raw_data={
"detail": {
"items": [
{
"description": "TEST ITEM",
"price": 5.00,
"extendedPrice": 5.00,
}
],
"total": 5.00,
}
},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "TXN-001"
assert len(result["items"]) == 1
def test_receipt_detail_failure_returns_empty(self, scraper):
raw = RawReceipt(
receipt_id="TXN-FAIL",
purchase_date="2026-03-10",
raw_data={"total": 10.00, "detail": {}},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "TXN-FAIL"
assert result["items"] == []