fix health check#1322
Conversation
There was a problem hiding this comment.
Code Review
This pull request refactors the health check mechanism from an active probe request model to a passive, on-the-fly check. The new logic determines health based on whether the last successful inference occurred within a grace timeout, or if the shared memory request manager is currently idle. The review feedback highlights two important issues: a potential AttributeError during server startup if the HTTP server manager is not yet fully initialized, and import-time side effects caused by initializing the SharedInt class attribute when the module is first imported rather than lazily.
| health_task = asyncio.create_task(health_check(g_objs.args, g_objs.httpserver_manager, None)) | ||
| if not health_obj.is_health(): | ||
| await health_task | ||
| is_healthy = health_check(g_objs.httpserver_manager.shm_req_manager) |
There was a problem hiding this comment.
If the server is still starting up or fails to initialize, g_objs.httpserver_manager may be None. Calling g_objs.httpserver_manager.shm_req_manager directly will raise an AttributeError, resulting in a 500 Internal Server Error instead of a clean 503 Service Unavailable response.
Adding a defensive check to ensure g_objs.httpserver_manager and its shm_req_manager are fully initialized before invoking the health check prevents crashes during startup probes.
if g_objs.httpserver_manager is None or getattr(g_objs.httpserver_manager, "shm_req_manager", None) is None:
return JSONResponse({"message": "Error"}, status_code=503)
is_healthy = health_check(g_objs.httpserver_manager.shm_req_manager)| class HealthObj: | ||
| _is_health: bool = False | ||
| _is_health_checking: bool = False | ||
| _failure_count: int = 0 | ||
| _failure_threshold: int = int(os.getenv("HEALTH_FAILURE_THRESHOLD", 3)) | ||
| timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100)) | ||
| dynamic_timeout: int = int(os.getenv("HEALTH_TIMEOUT", 100)) | ||
| grace_timeout: int = int(os.getenv("HEALTH_TIMEOUT", "200")) | ||
| latest_success_infer_time_mark = SharedInt(f"{get_unique_server_name()}_latest_success_infer_time_mark") | ||
|
|
||
| def begin_check(self): | ||
| self._is_health_checking = True | ||
|
|
||
| def end_check(self): | ||
| self._is_health_checking = False | ||
|
|
||
| def set_unhealth(self): | ||
| self._failure_count += 1 | ||
| self.dynamic_timeout += self.timeout | ||
| if self._failure_count > self._failure_threshold: | ||
| self._is_health = False | ||
|
|
||
| def set_health(self): | ||
| self._is_health = True | ||
| self._failure_count = 0 | ||
| self.dynamic_timeout = self.timeout | ||
|
|
||
| def is_health(self): | ||
| return self._is_health | ||
|
|
||
| def is_checking(self): | ||
| return self._is_health_checking | ||
|
|
||
| def has_latest_inference(self): | ||
| last_timemark = self.latest_success_infer_time_mark.get_value() | ||
| time_diff = time.time() - last_timemark | ||
| return time_diff < self.timeout | ||
| def check(self, shm_req_manager: "ShmReqManager") -> bool: | ||
| """On-the-fly health check: recent success is ok; otherwise require no in-flight shm requests.""" | ||
| try: | ||
| now = time.time() | ||
| last_success_time = self.latest_success_infer_time_mark.get_value() |
There was a problem hiding this comment.
Initializing latest_success_infer_time_mark as a class attribute at module import time can lead to several issues:
- Import-time Side Effects:
get_unique_server_name()is called immediately when the module is imported. If the environment variables or command-line arguments that determine the unique server name are not fully parsed or set yet, it may resolve to an incorrect or default name, causing the health check to look at a different shared memory segment than the actual server manager. - Multiprocessing/Uvicorn Reload Issues: Instantiating shared memory resources at import time can cause file descriptor leaks or permission issues when Uvicorn forks worker processes or reloads modules.
It is much safer to lazily initialize the SharedInt resource when the check method is first invoked.
class HealthObj:
grace_timeout: int = int(os.getenv("HEALTH_TIMEOUT", "200"))
def __post_init__(self):
self._latest_success_infer_time_mark = None
def check(self, shm_req_manager: "ShmReqManager") -> bool:
"""On-the-fly health check: recent success is ok; otherwise require no in-flight shm requests."""
try:
if self._latest_success_infer_time_mark is None:
self._latest_success_infer_time_mark = SharedInt(f"{get_unique_server_name()}_latest_success_infer_time_mark")
now = time.time()
last_success_time = self._latest_success_infer_time_mark.get_value()
No description provided.