Files
cartsnitch-ceo[bot] bf7cabc9d8 release: fix HIGH-severity CVEs in receiptwitness image (UAT+Security PASS)
release: fix HIGH-severity CVEs in receiptwitness image (UAT+Security PASS)
2026-04-19 02:40:14 +00:00

575 lines
21 KiB
Python

"""Tests for the Kroger scraper.
These tests mock Playwright to avoid requiring real Kroger credentials
or network access. They verify the scraper's control flow, session handling,
date filtering, and error resilience.
"""
from datetime import UTC, datetime, timedelta
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from receiptwitness.scrapers.base import RawReceipt, SessionData
from receiptwitness.scrapers.kroger import (
DEFAULT_TIMEZONE,
DEFAULT_USER_AGENT,
DEFAULT_VIEWPORT,
KROGER_BASE,
KROGER_LOGIN_PAGE,
KROGER_PURCHASE_HISTORY,
KrogerScraper,
)
@pytest.fixture
def scraper():
return KrogerScraper()
@pytest.fixture
def valid_session():
return SessionData(
cookies=[{"name": "session", "value": "abc123", "domain": ".kroger.com", "path": "/"}],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=datetime.now(UTC) + timedelta(hours=2),
extra={"retailer": "kroger"},
)
@pytest.fixture
def expired_session():
return SessionData(
cookies=[{"name": "session", "value": "expired", "domain": ".kroger.com", "path": "/"}],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC) - timedelta(hours=4),
expires_at=datetime.now(UTC) - timedelta(hours=2),
)
class TestKrogerScraperConstants:
def test_base_url(self):
assert KROGER_BASE == "https://www.kroger.com"
def test_login_page(self):
assert KROGER_LOGIN_PAGE == "https://www.kroger.com/signin"
def test_purchase_history_page(self):
assert KROGER_PURCHASE_HISTORY == "https://www.kroger.com/mypurchases"
def test_default_user_agent_is_chrome(self):
assert "Chrome" in DEFAULT_USER_AGENT
assert "Windows" in DEFAULT_USER_AGENT
def test_default_viewport_hd(self):
assert DEFAULT_VIEWPORT == {"width": 1920, "height": 1080}
def test_default_timezone(self):
assert DEFAULT_TIMEZONE == "America/New_York"
class TestCheckSession:
@pytest.mark.asyncio
async def test_expired_session_returns_false(self, scraper, expired_session):
result = await scraper.check_session(expired_session)
assert result is False
@pytest.mark.asyncio
async def test_no_expiry_checks_via_browser(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/account/dashboard"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is True
@pytest.mark.asyncio
async def test_session_redirected_to_signin_returns_false(self, scraper):
session = SessionData(
cookies=[],
user_agent=DEFAULT_USER_AGENT,
created_at=datetime.now(UTC),
expires_at=None,
)
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/signin?redirectUrl=account"
mock_response = MagicMock()
mock_response.ok = True
mock_page.goto = AsyncMock(return_value=mock_response)
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw:
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
result = await scraper.check_session(session)
assert result is False
class TestLogin:
@pytest.mark.asyncio
async def test_login_returns_session_data(self, scraper):
mock_page = AsyncMock()
mock_page.url = "https://www.kroger.com/"
# Mock locator chain
mock_email = AsyncMock()
mock_password = AsyncMock()
mock_button = AsyncMock()
mock_page.locator = MagicMock(side_effect=[mock_email, mock_password, mock_button])
mock_page.wait_for_url = AsyncMock()
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.cookies = AsyncMock(
return_value=[
{"name": "kroger_session", "value": "test123", "domain": ".kroger.com", "path": "/"}
]
)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
session = await scraper.login("user@test.com", "password123")
assert isinstance(session, SessionData)
assert len(session.cookies) == 1
assert session.cookies[0]["name"] == "kroger_session"
assert session.user_agent == DEFAULT_USER_AGENT
assert session.expires_at is not None
assert session.extra == {"retailer": "kroger"}
class TestScrapeReceipts:
@pytest.mark.asyncio
async def test_scrape_returns_receipts(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.status = 200
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{
"orderId": "KR-001",
"purchaseDate": "2026-03-10T14:00:00Z",
"storeNumber": "357",
},
{
"orderId": "KR-002",
"purchaseDate": "2026-03-11T10:00:00Z",
"storeNumber": "357",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={"items": []})
mock_request = AsyncMock()
mock_request.get = AsyncMock(
side_effect=[mock_api_response, mock_detail_response, mock_detail_response]
)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 2
assert receipts[0].receipt_id == "KR-001"
assert receipts[1].receipt_id == "KR-002"
assert isinstance(receipts[0], RawReceipt)
@pytest.mark.asyncio
async def test_scrape_filters_by_date(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{
"orderId": "KR-OLD",
"purchaseDate": "2026-01-01T10:00:00Z",
"storeNumber": "357",
},
{
"orderId": "KR-NEW",
"purchaseDate": "2026-03-15T10:00:00Z",
"storeNumber": "357",
},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
since = datetime(2026, 3, 1, tzinfo=UTC)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session, since=since)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-NEW"
@pytest.mark.asyncio
async def test_scrape_handles_api_failure(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = False
mock_api_response.status = 500
mock_api_response.status_text = "Internal Server Error"
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_handles_unexpected_response(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(return_value="not a dict")
mock_request = AsyncMock()
mock_request.get = AsyncMock(return_value=mock_api_response)
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert receipts == []
@pytest.mark.asyncio
async def test_scrape_alternative_field_names(self, scraper, valid_session):
"""Kroger may use 'purchases' instead of 'orders'."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"purchases": [
{
"receiptId": "KR-ALT-001",
"transactionDate": "2026-03-10T14:00:00Z",
"divisionNumber": "014",
}
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-ALT-001"
@pytest.mark.asyncio
async def test_scrape_skips_orders_without_id(self, scraper, valid_session):
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{"purchaseDate": "2026-03-10T14:00:00Z"}, # no id
{"orderId": "KR-VALID", "purchaseDate": "2026-03-10T14:00:00Z"},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-VALID"
@pytest.mark.asyncio
async def test_scrape_skips_orders_with_null_id(self, scraper, valid_session):
"""Ensure orderId: null doesn't produce receipt_id='None' (str(None) bug)."""
mock_api_response = AsyncMock()
mock_api_response.ok = True
mock_api_response.json = AsyncMock(
return_value={
"orders": [
{"orderId": None, "receiptId": None, "purchaseDate": "2026-03-10T14:00:00Z"},
{"orderId": "KR-REAL", "purchaseDate": "2026-03-10T14:00:00Z"},
]
}
)
mock_detail_response = AsyncMock()
mock_detail_response.ok = True
mock_detail_response.json = AsyncMock(return_value={})
mock_request = AsyncMock()
mock_request.get = AsyncMock(side_effect=[mock_api_response, mock_detail_response])
mock_page = AsyncMock()
mock_page.goto = AsyncMock()
mock_page.request = mock_request
mock_context = AsyncMock()
mock_context.new_page = AsyncMock(return_value=mock_page)
mock_context.add_cookies = AsyncMock()
mock_context.add_init_script = AsyncMock()
mock_browser = AsyncMock()
mock_browser.new_context = AsyncMock(return_value=mock_context)
mock_context.browser = mock_browser
mock_pw = AsyncMock()
mock_pw.chromium.launch = AsyncMock(return_value=mock_browser)
with (
patch("receiptwitness.scrapers.kroger.async_playwright") as mock_apw,
patch.object(scraper, "human_delay", new_callable=AsyncMock),
):
mock_cm = AsyncMock()
mock_cm.__aenter__ = AsyncMock(return_value=mock_pw)
mock_cm.__aexit__ = AsyncMock(return_value=False)
mock_apw.return_value = mock_cm
receipts = await scraper.scrape_receipts(valid_session)
assert len(receipts) == 1
assert receipts[0].receipt_id == "KR-REAL"
# Verify no receipt has the string "None" as its ID
assert all(r.receipt_id != "None" for r in receipts)
class TestParseReceipt:
def test_parse_receipt_delegates_to_parser(self, scraper):
raw = RawReceipt(
receipt_id="KR-001",
purchase_date="2026-03-12",
raw_data={
"detail": {
"items": [
{
"description": "TEST ITEM",
"basePrice": 5.00,
"totalPrice": 5.00,
}
],
"total": 5.00,
}
},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "KR-001"
assert len(result["items"]) == 1
def test_receipt_detail_failure_returns_empty(self, scraper):
"""Verify receipt detail failures produce empty detail."""
raw = RawReceipt(
receipt_id="KR-FAIL",
purchase_date="2026-03-12",
raw_data={"total": 10.00, "detail": {}},
)
result = scraper.parse_receipt(raw)
assert result["receipt_id"] == "KR-FAIL"
assert result["items"] == []