fix: /health returns 503 on DB failure, pool_timeout=30, CI typecheck fixes
CI / lint (pull_request) Failing after 4s
CI / typecheck (pull_request) Failing after 25s
CI / test (pull_request) Failing after 1m5s
CI / build-and-push (pull_request) Has been skipped

QA review of PR #39 (CAR-1121) identified three blocking issues; this
commit addresses all three plus the typecheck errors flagged as CI RED.

CAR-1077 (PR #39) changes:
- database.py: add pool_timeout=30 so the engine fails fast when the
  connection pool is exhausted (defends against the "server closed
  connection unexpectedly" pod failures).
- routes/health.py: /health now calls SELECT 1 through Depends(get_db)
  and raises HTTPException(503) when the database is unreachable, so
  Kubernetes readiness probes can correctly mark the pod unhealthy and
  stop routing traffic to it.  Logs the failure at exception level for
  observability.
- Drop .mcp.json from this PR (root-level MCP server config, not
  related to the pool fix; tracked separately).

CI typecheck fixes (pre-existing on dev, were failing mypy on PR #39):
- auth/passwords.py: cast bcrypt return values so mypy doesn't widen
  to Any.
- config.py: silence the false-positive call-arg on Settings() — the
  three required fields are populated from the environment by
  pydantic-settings at runtime.
- cache.py: coerce the bytes/str union returned by the redis client
  to the documented str | None return type.
- middleware/rate_limit.py: annotate the three module-level limiters
  with the RateLimitBackend protocol, cast the redis zrange score to
  float before arithmetic, and add max_requests/window_seconds to the
  protocol so the response-header builder can read them.

Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Barcode Betty
2026-06-02 14:53:16 +00:00
parent bd6b137c68
commit 2b20946ad7
7 changed files with 46 additions and 19 deletions
-11
View File
@@ -1,11 +0,0 @@
{
"mcpServers": {
"gitea": {
"type": "http",
"url": "https://git-mcp.farh.net/mcp",
"headers": {
"Authorization": "Bearer ${GITEA_TOKEN}"
}
}
}
}
+2 -2
View File
@@ -4,8 +4,8 @@ import bcrypt
def hash_password(password: str) -> str:
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
return str(bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode())
def verify_password(plain_password: str, hashed_password: str) -> bool:
return bcrypt.checkpw(plain_password.encode(), hashed_password.encode())
return bool(bcrypt.checkpw(plain_password.encode(), hashed_password.encode()))
+6 -1
View File
@@ -35,7 +35,12 @@ class CacheClient:
async def get(self, key: str) -> str | None:
if not self._client:
return None
return await self._client.get(key)
value = await self._client.get(key)
if value is None:
return None
if isinstance(value, bytes):
return value.decode("utf-8", errors="replace")
return value
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
if not self._client:
+1 -1
View File
@@ -86,4 +86,4 @@ class Settings(BaseSettings):
return self
settings = Settings()
settings = Settings() # type: ignore[call-arg]
+1
View File
@@ -14,6 +14,7 @@ def _build_engine_kwargs() -> dict:
kwargs.update(
pool_size=10,
max_overflow=20,
pool_timeout=30,
pool_pre_ping=True,
pool_recycle=3600,
)
+9 -1
View File
@@ -25,6 +25,9 @@ logger = logging.getLogger(__name__)
class RateLimitBackend(Protocol):
"""Protocol for rate limit backends."""
max_requests: int
window_seconds: int
async def is_allowed(self, key: str) -> tuple[bool, int, int]:
"""Check if request is allowed. Returns (allowed, remaining, retry_after)."""
@@ -82,7 +85,8 @@ class RedisSlidingWindow:
if current_count >= self.max_requests:
oldest = await self.redis.zrange(key, 0, 0, withscores=True)
if oldest:
retry_after = int((oldest[0][1] - cutoff) / 1000) + 1
oldest_score = float(oldest[0][1])
retry_after = int((oldest_score - cutoff) / 1000) + 1
else:
retry_after = self.window_seconds
return False, 0, retry_after
@@ -114,6 +118,10 @@ if settings.rate_limit_redis_enabled:
logger.warning("Failed to connect to Redis for rate limiting, using in-memory: %s", e)
_use_redis = False
_public_limiter: RateLimitBackend
_auth_limiter: RateLimitBackend
_auth_strict_limiter: RateLimitBackend
if _use_redis and _redis_client:
_public_limiter = RedisSlidingWindow(
_redis_client, settings.rate_limit_requests, settings.rate_limit_window_seconds
+27 -3
View File
@@ -1,16 +1,40 @@
"""Health check and error metrics endpoints."""
from fastapi import APIRouter, Depends
import logging
from fastapi import APIRouter, Depends, HTTPException, status
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncSession
from cartsnitch_api.auth.dependencies import verify_service_key
from cartsnitch_api.database import get_db
from cartsnitch_api.middleware.error_handler import get_error_monitor
logger = logging.getLogger(__name__)
router = APIRouter(tags=["health"])
@router.get("/health")
async def health():
return {"status": "ok"}
async def health(db: AsyncSession = Depends(get_db)):
"""Liveness + DB connectivity probe.
Returns HTTP 200 when the API process is responsive *and* the database
is reachable, so Kubernetes readiness probes can correctly route traffic
away from pods that have lost their database connection.
Returns HTTP 503 when the database is unreachable so K8s marks the pod
unhealthy and stops sending traffic to it.
"""
try:
await db.execute(text("SELECT 1"))
except Exception as exc:
logger.exception("Health check failed: database unreachable")
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail={"status": "unavailable", "database": "disconnected"},
) from exc
return {"status": "ok", "database": "connected"}
@router.get("/internal/error-stats", dependencies=[Depends(verify_service_key)])