fix: /health returns 503 on DB failure, pool_timeout=30, CI typecheck fixes
QA review of PR #39 (CAR-1121) identified three blocking issues; this commit addresses all three plus the typecheck errors flagged as CI RED. CAR-1077 (PR #39) changes: - database.py: add pool_timeout=30 so the engine fails fast when the connection pool is exhausted (defends against the "server closed connection unexpectedly" pod failures). - routes/health.py: /health now calls SELECT 1 through Depends(get_db) and raises HTTPException(503) when the database is unreachable, so Kubernetes readiness probes can correctly mark the pod unhealthy and stop routing traffic to it. Logs the failure at exception level for observability. - Drop .mcp.json from this PR (root-level MCP server config, not related to the pool fix; tracked separately). CI typecheck fixes (pre-existing on dev, were failing mypy on PR #39): - auth/passwords.py: cast bcrypt return values so mypy doesn't widen to Any. - config.py: silence the false-positive call-arg on Settings() — the three required fields are populated from the environment by pydantic-settings at runtime. - cache.py: coerce the bytes/str union returned by the redis client to the documented str | None return type. - middleware/rate_limit.py: annotate the three module-level limiters with the RateLimitBackend protocol, cast the redis zrange score to float before arithmetic, and add max_requests/window_seconds to the protocol so the response-header builder can read them. Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
@@ -1,11 +0,0 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"gitea": {
|
||||
"type": "http",
|
||||
"url": "https://git-mcp.farh.net/mcp",
|
||||
"headers": {
|
||||
"Authorization": "Bearer ${GITEA_TOKEN}"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,8 +4,8 @@ import bcrypt
|
||||
|
||||
|
||||
def hash_password(password: str) -> str:
|
||||
return bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode()
|
||||
return str(bcrypt.hashpw(password.encode(), bcrypt.gensalt()).decode())
|
||||
|
||||
|
||||
def verify_password(plain_password: str, hashed_password: str) -> bool:
|
||||
return bcrypt.checkpw(plain_password.encode(), hashed_password.encode())
|
||||
return bool(bcrypt.checkpw(plain_password.encode(), hashed_password.encode()))
|
||||
|
||||
@@ -35,7 +35,12 @@ class CacheClient:
|
||||
async def get(self, key: str) -> str | None:
|
||||
if not self._client:
|
||||
return None
|
||||
return await self._client.get(key)
|
||||
value = await self._client.get(key)
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, bytes):
|
||||
return value.decode("utf-8", errors="replace")
|
||||
return value
|
||||
|
||||
async def set(self, key: str, value: str, ttl_seconds: int = 300) -> None:
|
||||
if not self._client:
|
||||
|
||||
@@ -86,4 +86,4 @@ class Settings(BaseSettings):
|
||||
return self
|
||||
|
||||
|
||||
settings = Settings()
|
||||
settings = Settings() # type: ignore[call-arg]
|
||||
|
||||
@@ -14,6 +14,7 @@ def _build_engine_kwargs() -> dict:
|
||||
kwargs.update(
|
||||
pool_size=10,
|
||||
max_overflow=20,
|
||||
pool_timeout=30,
|
||||
pool_pre_ping=True,
|
||||
pool_recycle=3600,
|
||||
)
|
||||
|
||||
@@ -25,6 +25,9 @@ logger = logging.getLogger(__name__)
|
||||
class RateLimitBackend(Protocol):
|
||||
"""Protocol for rate limit backends."""
|
||||
|
||||
max_requests: int
|
||||
window_seconds: int
|
||||
|
||||
async def is_allowed(self, key: str) -> tuple[bool, int, int]:
|
||||
"""Check if request is allowed. Returns (allowed, remaining, retry_after)."""
|
||||
|
||||
@@ -82,7 +85,8 @@ class RedisSlidingWindow:
|
||||
if current_count >= self.max_requests:
|
||||
oldest = await self.redis.zrange(key, 0, 0, withscores=True)
|
||||
if oldest:
|
||||
retry_after = int((oldest[0][1] - cutoff) / 1000) + 1
|
||||
oldest_score = float(oldest[0][1])
|
||||
retry_after = int((oldest_score - cutoff) / 1000) + 1
|
||||
else:
|
||||
retry_after = self.window_seconds
|
||||
return False, 0, retry_after
|
||||
@@ -114,6 +118,10 @@ if settings.rate_limit_redis_enabled:
|
||||
logger.warning("Failed to connect to Redis for rate limiting, using in-memory: %s", e)
|
||||
_use_redis = False
|
||||
|
||||
_public_limiter: RateLimitBackend
|
||||
_auth_limiter: RateLimitBackend
|
||||
_auth_strict_limiter: RateLimitBackend
|
||||
|
||||
if _use_redis and _redis_client:
|
||||
_public_limiter = RedisSlidingWindow(
|
||||
_redis_client, settings.rate_limit_requests, settings.rate_limit_window_seconds
|
||||
|
||||
@@ -1,16 +1,40 @@
|
||||
"""Health check and error metrics endpoints."""
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, status
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
|
||||
from cartsnitch_api.auth.dependencies import verify_service_key
|
||||
from cartsnitch_api.database import get_db
|
||||
from cartsnitch_api.middleware.error_handler import get_error_monitor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["health"])
|
||||
|
||||
|
||||
@router.get("/health")
|
||||
async def health():
|
||||
return {"status": "ok"}
|
||||
async def health(db: AsyncSession = Depends(get_db)):
|
||||
"""Liveness + DB connectivity probe.
|
||||
|
||||
Returns HTTP 200 when the API process is responsive *and* the database
|
||||
is reachable, so Kubernetes readiness probes can correctly route traffic
|
||||
away from pods that have lost their database connection.
|
||||
|
||||
Returns HTTP 503 when the database is unreachable so K8s marks the pod
|
||||
unhealthy and stops sending traffic to it.
|
||||
"""
|
||||
try:
|
||||
await db.execute(text("SELECT 1"))
|
||||
except Exception as exc:
|
||||
logger.exception("Health check failed: database unreachable")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
||||
detail={"status": "unavailable", "database": "disconnected"},
|
||||
) from exc
|
||||
return {"status": "ok", "database": "connected"}
|
||||
|
||||
|
||||
@router.get("/internal/error-stats", dependencies=[Depends(verify_service_key)])
|
||||
|
||||
Reference in New Issue
Block a user