forked from cartsnitch/cartsnitch
Squashed 'receiptwitness/' content from commit e8d374a
git-subtree-dir: receiptwitness git-subtree-split: e8d374a89ed8978f429598e02d31b1c5963efe22
This commit is contained in:
@@ -0,0 +1,58 @@
|
||||
"""Tests for the base scraper class."""
|
||||
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from receiptwitness.scrapers.base import BaseScraper, RawReceipt, SessionData
|
||||
|
||||
|
||||
class ConcreteScraper(BaseScraper):
|
||||
"""Concrete implementation for testing the abstract base."""
|
||||
|
||||
async def login(self, username, password):
|
||||
return SessionData(
|
||||
cookies=[],
|
||||
user_agent="test",
|
||||
created_at=datetime.now(),
|
||||
)
|
||||
|
||||
async def check_session(self, session):
|
||||
return True
|
||||
|
||||
async def scrape_receipts(self, session, since=None):
|
||||
return []
|
||||
|
||||
def parse_receipt(self, raw):
|
||||
return {}
|
||||
|
||||
|
||||
class TestBaseScraper:
|
||||
@pytest.mark.asyncio
|
||||
async def test_human_delay_respects_bounds(self):
|
||||
scraper = ConcreteScraper()
|
||||
with patch("receiptwitness.scrapers.base.asyncio.sleep") as mock_sleep:
|
||||
mock_sleep.return_value = None
|
||||
await scraper.human_delay(min_ms=100, max_ms=200)
|
||||
call_args = mock_sleep.call_args[0][0]
|
||||
assert 0.1 <= call_args <= 0.2
|
||||
|
||||
def test_raw_receipt_dataclass(self):
|
||||
receipt = RawReceipt(
|
||||
receipt_id="test-123",
|
||||
purchase_date="2026-03-10",
|
||||
store_number="42",
|
||||
raw_data={"key": "value"},
|
||||
)
|
||||
assert receipt.receipt_id == "test-123"
|
||||
assert receipt.raw_data == {"key": "value"}
|
||||
|
||||
def test_session_data_defaults(self):
|
||||
session = SessionData(
|
||||
cookies=[],
|
||||
user_agent="test",
|
||||
created_at=datetime.now(),
|
||||
)
|
||||
assert session.expires_at is None
|
||||
assert session.extra == {}
|
||||
@@ -0,0 +1,574 @@
|
||||
"""Tests for the Kroger scraper.
|
||||
|
||||
These tests mock Playwright to avoid requiring real Kroger credentials
|
||||
or network access. They verify the scraper's control flow, session handling,
|
||||
date filtering, and error resilience.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from receiptwitness.scrapers.base import RawReceipt, SessionData
|
||||
from receiptwitness.scrapers.kroger import (
|
||||
DEFAULT_TIMEZONE,
|
||||
DEFAULT_USER_AGENT,
|
||||
DEFAULT_VIEWPORT,
|
||||
KROGER_BASE,
|
||||
KROGER_LOGIN_PAGE,
|
||||
KROGER_PURCHASE_HISTORY,
|
||||
KrogerScraper,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scraper():
|
||||
return KrogerScraper()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def valid_session():
|
||||
return SessionData(
|
||||
cookies=[{"name": "session", "value": "abc123", "domain": ".kroger.com", "path": "/"}],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=datetime.now(UTC) + timedelta(hours=2),
|
||||
extra={"retailer": "kroger"},
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expired_session():
|
||||
return SessionData(
|
||||
cookies=[{"name": "session", "value": "expired", "domain": ".kroger.com", "path": "/"}],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC) - timedelta(hours=4),
|
||||
expires_at=datetime.now(UTC) - timedelta(hours=2),
|
||||
)
|
||||
|
||||
|
||||
class TestKrogerScraperConstants:
|
||||
def test_base_url(self):
|
||||
assert KROGER_BASE == "https://www.kroger.com"
|
||||
|
||||
def test_login_page(self):
|
||||
assert KROGER_LOGIN_PAGE == "https://www.kroger.com/signin"
|
||||
|
||||
def test_purchase_history_page(self):
|
||||
assert KROGER_PURCHASE_HISTORY == "https://www.kroger.com/mypurchases"
|
||||
|
||||
def test_default_user_agent_is_chrome(self):
|
||||
assert "Chrome" in DEFAULT_USER_AGENT
|
||||
assert "Windows" in DEFAULT_USER_AGENT
|
||||
|
||||
def test_default_viewport_hd(self):
|
||||
assert DEFAULT_VIEWPORT == {"width": 1920, "height": 1080}
|
||||
|
||||
def test_default_timezone(self):
|
||||
assert DEFAULT_TIMEZONE == "America/New_York"
|
||||
|
||||
|
||||
class TestCheckSession:
|
||||
@pytest.mark.asyncio
|
||||
async def test_expired_session_returns_false(self, scraper, expired_session):
|
||||
result = await scraper.check_session(expired_session)
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_expiry_checks_via_browser(self, scraper):
|
||||
session = SessionData(
|
||||
cookies=[],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=None,
|
||||
)
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.kroger.com/account/dashboard"
|
||||
mock_response = MagicMock()
|
||||
mock_response.ok = True
|
||||
mock_page.goto = AsyncMock(return_value=mock_response)
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
result = await scraper.check_session(session)
|
||||
assert result is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_redirected_to_signin_returns_false(self, scraper):
|
||||
session = SessionData(
|
||||
cookies=[],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=None,
|
||||
)
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.kroger.com/signin?redirectUrl=account"
|
||||
mock_response = MagicMock()
|
||||
mock_response.ok = True
|
||||
mock_page.goto = AsyncMock(return_value=mock_response)
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
result = await scraper.check_session(session)
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestLogin:
|
||||
@pytest.mark.asyncio
|
||||
async def test_login_returns_session_data(self, scraper):
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.kroger.com/"
|
||||
|
||||
# Mock locator chain
|
||||
mock_email = AsyncMock()
|
||||
mock_password = AsyncMock()
|
||||
mock_button = AsyncMock()
|
||||
mock_page.locator = MagicMock(side_effect=[mock_email, mock_password, mock_button])
|
||||
mock_page.wait_for_url = AsyncMock()
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.cookies = AsyncMock(
|
||||
return_value=[
|
||||
{"name": "kroger_session", "value": "test123", "domain": ".kroger.com", "path": "/"}
|
||||
]
|
||||
)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
session = await scraper.login("user@test.com", "password123")
|
||||
|
||||
assert isinstance(session, SessionData)
|
||||
assert len(session.cookies) == 1
|
||||
assert session.cookies[0]["name"] == "kroger_session"
|
||||
assert session.user_agent == DEFAULT_USER_AGENT
|
||||
assert session.expires_at is not None
|
||||
assert session.extra == {"retailer": "kroger"}
|
||||
|
||||
|
||||
class TestScrapeReceipts:
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_returns_receipts(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.status = 200
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"orders": [
|
||||
{
|
||||
"orderId": "KR-001",
|
||||
"purchaseDate": "2026-03-10T14:00:00Z",
|
||||
"storeNumber": "357",
|
||||
},
|
||||
{
|
||||
"orderId": "KR-002",
|
||||
"purchaseDate": "2026-03-11T10:00:00Z",
|
||||
"storeNumber": "357",
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={"items": []})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(
|
||||
side_effect=[mock_api_response, mock_detail_response, mock_detail_response]
|
||||
)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
|
||||
assert len(receipts) == 2
|
||||
assert receipts[0].receipt_id == "KR-001"
|
||||
assert receipts[1].receipt_id == "KR-002"
|
||||
assert isinstance(receipts[0], RawReceipt)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_filters_by_date(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"orders": [
|
||||
{
|
||||
"orderId": "KR-OLD",
|
||||
"purchaseDate": "2026-01-01T10:00:00Z",
|
||||
"storeNumber": "357",
|
||||
},
|
||||
{
|
||||
"orderId": "KR-NEW",
|
||||
"purchaseDate": "2026-03-15T10:00:00Z",
|
||||
"storeNumber": "357",
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
since = datetime(2026, 3, 1, tzinfo=UTC)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session, since=since)
|
||||
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "KR-NEW"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_handles_api_failure(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = False
|
||||
mock_api_response.status = 500
|
||||
mock_api_response.status_text = "Internal Server Error"
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(return_value=mock_api_response)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert receipts == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_handles_unexpected_response(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(return_value="not a dict")
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(return_value=mock_api_response)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert receipts == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_alternative_field_names(self, scraper, valid_session):
|
||||
"""Kroger may use 'purchases' instead of 'orders'."""
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"purchases": [
|
||||
{
|
||||
"receiptId": "KR-ALT-001",
|
||||
"transactionDate": "2026-03-10T14:00:00Z",
|
||||
"divisionNumber": "014",
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "KR-ALT-001"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_skips_orders_without_id(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"orders": [
|
||||
{"purchaseDate": "2026-03-10T14:00:00Z"}, # no id
|
||||
{"orderId": "KR-VALID", "purchaseDate": "2026-03-10T14:00:00Z"},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "KR-VALID"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_skips_orders_with_null_id(self, scraper, valid_session):
|
||||
"""Ensure orderId: null doesn't produce receipt_id='None' (str(None) bug)."""
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"orders": [
|
||||
{"orderId": None, "receiptId": None, "purchaseDate": "2026-03-10T14:00:00Z"},
|
||||
{"orderId": "KR-REAL", "purchaseDate": "2026-03-10T14:00:00Z"},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "KR-REAL"
|
||||
# Verify no receipt has the string "None" as its ID
|
||||
assert all(r.receipt_id != "None" for r in receipts)
|
||||
|
||||
|
||||
class TestParseReceipt:
|
||||
def test_parse_receipt_delegates_to_parser(self, scraper):
|
||||
raw = RawReceipt(
|
||||
receipt_id="KR-001",
|
||||
purchase_date="2026-03-12",
|
||||
raw_data={
|
||||
"detail": {
|
||||
"items": [
|
||||
{
|
||||
"description": "TEST ITEM",
|
||||
"basePrice": 5.00,
|
||||
"totalPrice": 5.00,
|
||||
}
|
||||
],
|
||||
"total": 5.00,
|
||||
}
|
||||
},
|
||||
)
|
||||
result = scraper.parse_receipt(raw)
|
||||
assert result["receipt_id"] == "KR-001"
|
||||
assert len(result["items"]) == 1
|
||||
|
||||
def test_receipt_detail_failure_returns_empty(self, scraper):
|
||||
"""Verify receipt detail failures produce empty detail."""
|
||||
raw = RawReceipt(
|
||||
receipt_id="KR-FAIL",
|
||||
purchase_date="2026-03-12",
|
||||
raw_data={"total": 10.00, "detail": {}},
|
||||
)
|
||||
result = scraper.parse_receipt(raw)
|
||||
assert result["receipt_id"] == "KR-FAIL"
|
||||
assert result["items"] == []
|
||||
@@ -0,0 +1,585 @@
|
||||
"""Tests for the Meijer scraper.
|
||||
|
||||
These tests mock Playwright to avoid requiring real Meijer credentials
|
||||
or network access. They verify the scraper's control flow, session handling,
|
||||
date filtering, and error resilience.
|
||||
"""
|
||||
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from receiptwitness.scrapers.base import RawReceipt, SessionData
|
||||
from receiptwitness.scrapers.meijer import (
|
||||
DEFAULT_TIMEZONE,
|
||||
DEFAULT_USER_AGENT,
|
||||
DEFAULT_VIEWPORT,
|
||||
MEIJER_BASE,
|
||||
MEIJER_LOGIN_PAGE,
|
||||
MEIJER_MPERKS_HOME,
|
||||
MEIJER_PURCHASE_HISTORY,
|
||||
MeijerScraper,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def scraper():
|
||||
return MeijerScraper()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def valid_session():
|
||||
return SessionData(
|
||||
cookies=[
|
||||
{"name": "meijer_session", "value": "abc123", "domain": ".meijer.com", "path": "/"}
|
||||
],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=datetime.now(UTC) + timedelta(hours=4),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def expired_session():
|
||||
return SessionData(
|
||||
cookies=[
|
||||
{"name": "meijer_session", "value": "expired", "domain": ".meijer.com", "path": "/"}
|
||||
],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC) - timedelta(hours=8),
|
||||
expires_at=datetime.now(UTC) - timedelta(hours=4),
|
||||
)
|
||||
|
||||
|
||||
class TestMeijerScraperConstants:
|
||||
def test_base_url(self):
|
||||
assert MEIJER_BASE == "https://www.meijer.com"
|
||||
|
||||
def test_login_page(self):
|
||||
assert MEIJER_LOGIN_PAGE == "https://www.meijer.com/shopping/login.html"
|
||||
|
||||
def test_mperks_home(self):
|
||||
assert MEIJER_MPERKS_HOME == "https://www.meijer.com/mperks.html"
|
||||
|
||||
def test_purchase_history_url(self):
|
||||
assert (
|
||||
MEIJER_PURCHASE_HISTORY == "https://www.meijer.com/bin/meijer/profile/purchasehistory"
|
||||
)
|
||||
|
||||
def test_default_user_agent_is_chrome(self):
|
||||
assert "Chrome" in DEFAULT_USER_AGENT
|
||||
assert "Windows" in DEFAULT_USER_AGENT
|
||||
|
||||
def test_default_viewport_hd(self):
|
||||
assert DEFAULT_VIEWPORT == {"width": 1920, "height": 1080}
|
||||
|
||||
def test_default_timezone(self):
|
||||
assert DEFAULT_TIMEZONE == "America/Detroit"
|
||||
|
||||
|
||||
class TestCheckSession:
|
||||
@pytest.mark.asyncio
|
||||
async def test_expired_session_returns_false(self, scraper, expired_session):
|
||||
result = await scraper.check_session(expired_session)
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_expiry_checks_via_browser(self, scraper):
|
||||
session = SessionData(
|
||||
cookies=[],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=None,
|
||||
)
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.meijer.com/mperks.html"
|
||||
mock_response = MagicMock()
|
||||
mock_response.ok = True
|
||||
mock_page.goto = AsyncMock(return_value=mock_response)
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw:
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
result = await scraper.check_session(session)
|
||||
assert result is True
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_session_redirected_to_login_returns_false(self, scraper):
|
||||
session = SessionData(
|
||||
cookies=[],
|
||||
user_agent=DEFAULT_USER_AGENT,
|
||||
created_at=datetime.now(UTC),
|
||||
expires_at=None,
|
||||
)
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.meijer.com/shopping/login.html?redirect=mperks"
|
||||
mock_response = MagicMock()
|
||||
mock_response.ok = True
|
||||
mock_page.goto = AsyncMock(return_value=mock_response)
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw:
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
result = await scraper.check_session(session)
|
||||
assert result is False
|
||||
|
||||
|
||||
class TestLogin:
|
||||
@pytest.mark.asyncio
|
||||
async def test_login_returns_session_data(self, scraper):
|
||||
mock_page = AsyncMock()
|
||||
mock_page.url = "https://www.meijer.com/mperks.html"
|
||||
|
||||
# Mock locator chain
|
||||
mock_email = AsyncMock()
|
||||
mock_password = AsyncMock()
|
||||
mock_button = AsyncMock()
|
||||
mock_page.locator = MagicMock(side_effect=[mock_email, mock_password, mock_button])
|
||||
mock_page.wait_for_url = AsyncMock()
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.cookies = AsyncMock(
|
||||
return_value=[
|
||||
{"name": "meijer_session", "value": "test456", "domain": ".meijer.com", "path": "/"}
|
||||
]
|
||||
)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
session = await scraper.login("user@test.com", "password123")
|
||||
|
||||
assert isinstance(session, SessionData)
|
||||
assert len(session.cookies) == 1
|
||||
assert session.cookies[0]["name"] == "meijer_session"
|
||||
assert session.user_agent == DEFAULT_USER_AGENT
|
||||
assert session.expires_at is not None
|
||||
# Meijer sessions last 4 hours
|
||||
assert session.expires_at > session.created_at + timedelta(hours=3)
|
||||
|
||||
|
||||
class TestScrapeReceipts:
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_returns_receipts(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.status = 200
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"transactions": [
|
||||
{
|
||||
"transactionId": "TXN-001",
|
||||
"transactionDate": "2026-03-10T14:00:00Z",
|
||||
"storeNumber": "42",
|
||||
},
|
||||
{
|
||||
"transactionId": "TXN-002",
|
||||
"transactionDate": "2026-03-11T10:00:00Z",
|
||||
"storeNumber": "42",
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={"items": []})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(
|
||||
side_effect=[mock_api_response, mock_detail_response, mock_detail_response]
|
||||
)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
|
||||
assert len(receipts) == 2
|
||||
assert receipts[0].receipt_id == "TXN-001"
|
||||
assert receipts[1].receipt_id == "TXN-002"
|
||||
assert isinstance(receipts[0], RawReceipt)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_filters_by_date(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"transactions": [
|
||||
{
|
||||
"transactionId": "TXN-OLD",
|
||||
"transactionDate": "2026-01-01T10:00:00Z",
|
||||
"storeNumber": "42",
|
||||
},
|
||||
{
|
||||
"transactionId": "TXN-NEW",
|
||||
"transactionDate": "2026-03-15T10:00:00Z",
|
||||
"storeNumber": "42",
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
since = datetime(2026, 3, 1, tzinfo=UTC)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session, since=since)
|
||||
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "TXN-NEW"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_handles_api_failure(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = False
|
||||
mock_api_response.status = 500
|
||||
mock_api_response.status_text = "Internal Server Error"
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(return_value=mock_api_response)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert receipts == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_handles_unexpected_response(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(return_value="not a dict")
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(return_value=mock_api_response)
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert receipts == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_alternative_field_names(self, scraper, valid_session):
|
||||
"""Meijer may use 'purchaseHistory' instead of 'transactions'."""
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"purchaseHistory": [
|
||||
{
|
||||
"receiptId": "MJ-ALT-001",
|
||||
"purchaseDate": "2026-03-10T14:00:00Z",
|
||||
"storeId": "99",
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "MJ-ALT-001"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_skips_transactions_without_id(self, scraper, valid_session):
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"transactions": [
|
||||
{"transactionDate": "2026-03-10T14:00:00Z"}, # no id
|
||||
{"transactionId": "TXN-VALID", "transactionDate": "2026-03-10T14:00:00Z"},
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = True
|
||||
mock_detail_response.json = AsyncMock(return_value={})
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "TXN-VALID"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_scrape_receipt_detail_failure_returns_empty_detail(self, scraper, valid_session):
|
||||
"""Receipt detail API failure should not crash the scraper."""
|
||||
mock_api_response = AsyncMock()
|
||||
mock_api_response.ok = True
|
||||
mock_api_response.json = AsyncMock(
|
||||
return_value={
|
||||
"transactions": [
|
||||
{
|
||||
"transactionId": "TXN-DETAIL-FAIL",
|
||||
"transactionDate": "2026-03-10T14:00:00Z",
|
||||
"storeNumber": "42",
|
||||
}
|
||||
]
|
||||
}
|
||||
)
|
||||
|
||||
mock_detail_response = AsyncMock()
|
||||
mock_detail_response.ok = False
|
||||
mock_detail_response.status = 404
|
||||
|
||||
mock_request = AsyncMock()
|
||||
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
|
||||
|
||||
mock_page = AsyncMock()
|
||||
mock_page.goto = AsyncMock()
|
||||
mock_page.request = mock_request
|
||||
|
||||
mock_context = AsyncMock()
|
||||
mock_context.new_page = AsyncMock(return_value=mock_page)
|
||||
mock_context.add_cookies = AsyncMock()
|
||||
mock_context.add_init_script = AsyncMock()
|
||||
mock_browser = AsyncMock()
|
||||
mock_browser.new_context = AsyncMock(return_value=mock_context)
|
||||
mock_context.browser = mock_browser
|
||||
|
||||
mock_pw = AsyncMock()
|
||||
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
|
||||
|
||||
with (
|
||||
patch("receiptwitness.scrapers.meijer.async_playwright") as mock_apw,
|
||||
patch.object(scraper, "human_delay", new_callable=AsyncMock),
|
||||
):
|
||||
mock_cm = AsyncMock()
|
||||
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
|
||||
mock_cm.__aexit__ = AsyncMock(return_value=False)
|
||||
mock_apw.return_value = mock_cm
|
||||
|
||||
receipts = await scraper.scrape_receipts(valid_session)
|
||||
assert len(receipts) == 1
|
||||
assert receipts[0].receipt_id == "TXN-DETAIL-FAIL"
|
||||
assert receipts[0].raw_data.get("detail") == {}
|
||||
|
||||
|
||||
class TestParseReceipt:
|
||||
def test_parse_receipt_delegates_to_parser(self, scraper):
|
||||
raw = RawReceipt(
|
||||
receipt_id="TXN-001",
|
||||
purchase_date="2026-03-10",
|
||||
raw_data={
|
||||
"detail": {
|
||||
"items": [
|
||||
{
|
||||
"description": "TEST ITEM",
|
||||
"price": 5.00,
|
||||
"extendedPrice": 5.00,
|
||||
}
|
||||
],
|
||||
"total": 5.00,
|
||||
}
|
||||
},
|
||||
)
|
||||
result = scraper.parse_receipt(raw)
|
||||
assert result["receipt_id"] == "TXN-001"
|
||||
assert len(result["items"]) == 1
|
||||
|
||||
def test_receipt_detail_failure_returns_empty(self, scraper):
|
||||
raw = RawReceipt(
|
||||
receipt_id="TXN-FAIL",
|
||||
purchase_date="2026-03-10",
|
||||
raw_data={"total": 10.00, "detail": {}},
|
||||
)
|
||||
result = scraper.parse_receipt(raw)
|
||||
assert result["receipt_id"] == "TXN-FAIL"
|
||||
assert result["items"] == []
|
||||
Reference in New Issue
Block a user