2b20946ad7
QA review of PR #39 (CAR-1121) identified three blocking issues; this commit addresses all three plus the typecheck errors flagged as CI RED. CAR-1077 (PR #39) changes: - database.py: add pool_timeout=30 so the engine fails fast when the connection pool is exhausted (defends against the "server closed connection unexpectedly" pod failures). - routes/health.py: /health now calls SELECT 1 through Depends(get_db) and raises HTTPException(503) when the database is unreachable, so Kubernetes readiness probes can correctly mark the pod unhealthy and stop routing traffic to it. Logs the failure at exception level for observability. - Drop .mcp.json from this PR (root-level MCP server config, not related to the pool fix; tracked separately). CI typecheck fixes (pre-existing on dev, were failing mypy on PR #39): - auth/passwords.py: cast bcrypt return values so mypy doesn't widen to Any. - config.py: silence the false-positive call-arg on Settings() — the three required fields are populated from the environment by pydantic-settings at runtime. - cache.py: coerce the bytes/str union returned by the redis client to the documented str | None return type. - middleware/rate_limit.py: annotate the three module-level limiters with the RateLimitBackend protocol, cast the redis zrange score to float before arithmetic, and add max_requests/window_seconds to the protocol so the response-header builder can read them. Co-Authored-By: Paperclip <noreply@paperclip.ing>
45 lines
1.5 KiB
Python
45 lines
1.5 KiB
Python
"""Health check and error metrics endpoints."""
|
|
|
|
import logging
|
|
|
|
from fastapi import APIRouter, Depends, HTTPException, status
|
|
from sqlalchemy import text
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
|
|
from cartsnitch_api.auth.dependencies import verify_service_key
|
|
from cartsnitch_api.database import get_db
|
|
from cartsnitch_api.middleware.error_handler import get_error_monitor
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["health"])
|
|
|
|
|
|
@router.get("/health")
|
|
async def health(db: AsyncSession = Depends(get_db)):
|
|
"""Liveness + DB connectivity probe.
|
|
|
|
Returns HTTP 200 when the API process is responsive *and* the database
|
|
is reachable, so Kubernetes readiness probes can correctly route traffic
|
|
away from pods that have lost their database connection.
|
|
|
|
Returns HTTP 503 when the database is unreachable so K8s marks the pod
|
|
unhealthy and stops sending traffic to it.
|
|
"""
|
|
try:
|
|
await db.execute(text("SELECT 1"))
|
|
except Exception as exc:
|
|
logger.exception("Health check failed: database unreachable")
|
|
raise HTTPException(
|
|
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
detail={"status": "unavailable", "database": "disconnected"},
|
|
) from exc
|
|
return {"status": "ok", "database": "connected"}
|
|
|
|
|
|
@router.get("/internal/error-stats", dependencies=[Depends(verify_service_key)])
|
|
async def error_stats():
|
|
"""Error monitoring stats — internal only (requires X-Service-Key)."""
|
|
monitor = get_error_monitor()
|
|
return monitor.get_stats()
|