Merge pull request #12 from gnir-work/feature/add-blocking-handle-name-to-monitoring

gnir-work · web-flow · commit 4bd27d50dc15 · 2024-07-12T23:39:09.000+03:00
Add pretty handle name to monitored result
diff --git a/README.md b/README.md
@@ -51,6 +51,8 @@ With every call you will receive an [IoLoopMonitorState](monitored_ioloop/monito
 - `callback_wall_time`: Wall executing time of the callback.
 - `loop_handles_count`: The amount of handles (think about them as tasks) that the IO loop is currently handling.
 - `loop_lag`: The amount of time it took from the moment the task was added to the loop until it was executed.
+- `callback_pretty_name`: The pretty name of the callback that was executed  
+__Please Note__: This is a best effort, the name of the callback may still be of little help, depending on the specific callback implementation.
 
 ## Performance impact
 As many of you might be concerned about the performance impact of this library, I have run some benchmarks to measure the performance impact of this library.  
@@ -66,7 +68,7 @@ Currently there is only the [fastapi with prometheus exporter example](examples/
 - [x] Add support for the amount of `Handle`'s on the event loop
 - [x] Add an examples folder
 - [x] Add loop lag metric (Inspired from nodejs loop monitoring)
-- [ ] Add visibility into which `Handle` are making the event loop slower
+- [x] Add visibility into which `Handle` are making the event loop slower
 - [ ] Add easier integration with `uvicorn`
 - [ ] Add easier integration with popular monitoring tools like Prometheus
 
diff --git a/examples/simple_python_example/simple_python_example.py b/examples/simple_python_example/simple_python_example.py
@@ -8,7 +8,7 @@
 
 
 async def main() -> None:
-    async def sleep(coroutine_id: int) -> None:
+    async def non_blocking_sleep(coroutine_id: int) -> None:
         logger.info(f"[id: {coroutine_id}] Before non-blocking sleep")
         await asyncio.sleep(1)
         logger.info(f"[id: {coroutine_id}] After non-blocking sleep")
@@ -19,23 +19,25 @@ async def blocking_sleep(coroutine_id: int) -> None:
         logger.info(f"[id: {coroutine_id}] After blocking sleep")
 
     await asyncio.gather(
-        sleep(coroutine_id=0),
+        non_blocking_sleep(coroutine_id=0),
         blocking_sleep(coroutine_id=1),
-        sleep(coroutine_id=2),
-        sleep(coroutine_id=3),
+        non_blocking_sleep(coroutine_id=2),
+        non_blocking_sleep(coroutine_id=3),
         blocking_sleep(coroutine_id=4),
-        sleep(coroutine_id=5),
+        non_blocking_sleep(coroutine_id=5),
     )
 
 
 def monitor(ioloop_monitor_state: IoLoopMonitorState) -> None:
     if ioloop_monitor_state.callback_wall_time > 0.1:
         logger.warning(
-            f"Blocking operation detected, executing took: {ioloop_monitor_state.callback_wall_time}"
+            f"Blocking operation detected, executing of {ioloop_monitor_state.callback_pretty_name} "
+            f"took: {ioloop_monitor_state.callback_wall_time}"
         )
     if ioloop_monitor_state.loop_lag > 0.1:
         logger.warning(
-            f"A task was executed after a significant delay: {ioloop_monitor_state.loop_lag}"
+            f"Task {ioloop_monitor_state.callback_pretty_name} was executed after "
+            f"a significant delay: {ioloop_monitor_state.loop_lag}"
         )
 
 
diff --git a/monitored_ioloop/formatting_utils.py b/monitored_ioloop/formatting_utils.py
@@ -0,0 +1,20 @@
+import typing
+from asyncio import tasks
+from asyncio.events import Handle
+
+
+def pretty_format_handle(handle: Handle) -> str:
+    callback = handle._callback  # type: ignore
+    if isinstance(getattr(callback, "__self__", None), tasks.Task):
+        # format the task
+        return repr(callback.__self__)
+    else:
+        return repr(handle)
+
+
+def pretty_callback_name(callback: typing.Callable[..., typing.Any]) -> str:
+    if isinstance(getattr(callback, "__self__", None), tasks.Task):
+        # format the task
+        return repr(callback.__self__)  # type: ignore
+    else:
+        return repr(callback)
diff --git a/monitored_ioloop/monitored_asyncio.py b/monitored_ioloop/monitored_asyncio.py
@@ -31,7 +31,9 @@ def call_soon(
             callback, self._monitor_callback, self._state
         )
 
-        return super().call_soon(callback_with_monitoring, *args, **kwargs)
+        handle = super().call_soon(callback_with_monitoring, *args, **kwargs)
+        callback_with_monitoring.set_handle(handle)
+        return handle
 
 
 class MonitoredAsyncIOEventLoopPolicy(BaseMonitoredEventLoopPolicy):
diff --git a/monitored_ioloop/monitoring.py b/monitored_ioloop/monitoring.py
@@ -1,9 +1,12 @@
 import threading
 import time
 import typing
+from asyncio import Handle
 from dataclasses import dataclass
 from logging import getLogger
 
+from monitored_ioloop.formatting_utils import pretty_format_handle, pretty_callback_name
+
 logger = getLogger(__name__)
 
 
@@ -28,6 +31,12 @@ class IoLoopMonitorState:
     """
     callback_wall_time: float
 
+    """
+    A best effort try to give a meaningful name to the callback that was currently executed.
+    This property will come in handy when trying to debug callbacks with high wall time. 
+    """
+    callback_pretty_name: str
+
     """
     The amount of handles in the loop, excluding the current one.
     """
@@ -59,36 +68,55 @@ def decrease_handles_count_thread_safe(self, decrease_by: int) -> None:
             self.handles_count -= decrease_by
 
 
-def wrap_callback_with_monitoring(
-    callback: typing.Callable[..., typing.Any],
-    monitor_callback: typing.Callable[[IoLoopMonitorState], None],
-    ioloop_state: IoLoopInnerState,
-) -> typing.Callable[..., typing.Any]:
-    """
-    Add monitoring to a callback.
-    The callback will be wrapped in a function that will monitor the callbacks execution time and report
-    back to the monitor_callback.
-    """
-    ioloop_state.increase_handles_count_thread_safe(1)
-    added_to_loop_time = time.perf_counter()
-
-    def wrapper(*inner_args: typing.Any, **inner_kwargs: typing.Any) -> typing.Any:
-        loop_lag = time.perf_counter() - added_to_loop_time
+class MonitoredCallbackWrapper:
+    def __init__(
+        self,
+        callback: typing.Callable[..., typing.Any],
+        monitor_callback: typing.Callable[[IoLoopMonitorState], None],
+        io_loop_state: IoLoopInnerState,
+    ):
+        self._original_callback = callback
+        self._monitor_callback = monitor_callback
+        self._ioloop_state = io_loop_state
+        self._added_to_loop_time = time.perf_counter()
+        self._handle: typing.Optional[Handle] = None
+
+    def set_handle(self, handle: Handle) -> None:
+        self._handle = handle
+
+    def __call__(self, *args: typing.Any, **kwargs: typing.Any) -> typing.Any:
+        loop_lag = time.perf_counter() - self._added_to_loop_time
         start_wall_time = time.perf_counter()
-        response = callback(*inner_args, **inner_kwargs)
-        ioloop_state.decrease_handles_count_thread_safe(1)
+        response = self._original_callback(*args, **kwargs)
+        self._ioloop_state.decrease_handles_count_thread_safe(1)
         wall_duration = time.perf_counter() - start_wall_time
 
         try:
-            monitor_callback(
+            pretty_name = (
+                pretty_format_handle(self._handle)
+                if self._handle
+                else pretty_callback_name(self._original_callback)
+            )
+            self._monitor_callback(
                 IoLoopMonitorState(
                     callback_wall_time=wall_duration,
-                    loop_handles_count=ioloop_state.handles_count,
+                    loop_handles_count=self._ioloop_state.handles_count,
                     loop_lag=loop_lag,
+                    callback_pretty_name=pretty_name,
                 )
             )
         except Exception:
             logger.warning("Monitor callback failed.", exc_info=True)
         return response
 
-    return wrapper
+    def __getattr__(self, item: str) -> typing.Any:
+        return getattr(self._original_callback, item)
+
+
+def wrap_callback_with_monitoring(
+    callback: typing.Callable[..., typing.Any],
+    monitor_callback: typing.Callable[[IoLoopMonitorState], None],
+    ioloop_state: IoLoopInnerState,
+) -> MonitoredCallbackWrapper:
+    ioloop_state.increase_handles_count_thread_safe(1)
+    return MonitoredCallbackWrapper(callback, monitor_callback, ioloop_state)
diff --git a/stress_tests/results/README.md b/stress_tests/results/README.md
@@ -7,10 +7,10 @@ The architecture of system tests was
 
 ### Tests
 #### 300 active locust users
-Under the stress of 300 users which resulted in ~70 requests per second there was
+Under the stress of 300 users which resulted in ~70 requests __per second__ there was
 no visible difference in the response time between monitored and vanilla loops.
 
 #### 1000 active locust users
-Under the stress of 1000 users which resulted in ~220 requests per second there was
-a 5-10% increase in response time when observing the 90th - 100th percentile of longest requests.  
+Under the stress of 1000 users which resulted in ~220 requests __per second__ there was
+a 5~7% increase in response time when observing the 90th - 100th percentile of longest requests.  
 requests under the 90th percentile were not affected by the monitoring loop.
diff --git a/tests/test_asyncio_profiler.py b/tests/test_asyncio_profiler.py
@@ -213,3 +213,93 @@ def test_loop_lag(
         )
         == non_blocking_coroutines_count
     )
+
+
+@pytest.mark.parametrize(
+    "ioloop_policy_class",
+    [MonitoredAsyncIOEventLoopPolicy, MonitoredUvloopEventLoopPolicy],
+)
+def test_callback_pretty_name__basic_top_level_coroutine_name(
+    ioloop_policy_class: typing.Type[MonitoredUvloopEventLoopPolicy],
+) -> None:
+    mock = Mock()
+    asyncio.set_event_loop_policy(ioloop_policy_class(monitor_callback=mock))
+    asyncio.run(non_cpu_intensive_blocking_coroutine(0.1))
+    assert (
+        len(
+            [
+                callback_pretty_name
+                for call in mock.mock_calls
+                if "non_cpu_intensive_blocking_coroutine"
+                in (callback_pretty_name := call.args[0].callback_pretty_name)
+            ]
+        )
+        == 1
+    )
+
+
+async def several_coroutines_in_gather_with_pretty_name_testing() -> None:
+    async def first_function() -> None:
+        time.sleep(0.1)
+
+    async def second_function() -> None:
+        time.sleep(0.1)
+
+    await asyncio.gather(
+        first_function(),
+        first_function(),
+        second_function(),
+    )
+
+
+@pytest.mark.parametrize(
+    "ioloop_policy_class",
+    [MonitoredAsyncIOEventLoopPolicy, MonitoredUvloopEventLoopPolicy],
+)
+def test_callback_pretty_name__several_coroutines_with_gather(
+    ioloop_policy_class: typing.Type[MonitoredUvloopEventLoopPolicy],
+) -> None:
+    mock = Mock()
+    asyncio.set_event_loop_policy(ioloop_policy_class(monitor_callback=mock))
+    asyncio.run(several_coroutines_in_gather_with_pretty_name_testing())
+
+    # The function is called twice from the ioloop, once until the gather and once after the gather has finished
+    assert (
+        len(
+            [
+                callback_pretty_name
+                for call in mock.mock_calls
+                if (
+                    "several_coroutines_in_gather_with_pretty_name_testing"
+                    in (callback_pretty_name := call.args[0].callback_pretty_name)
+                    and "first_function" not in callback_pretty_name
+                    and "second_function" not in callback_pretty_name
+                )
+            ]
+        )
+        == 2
+    )
+
+    assert (
+        len(
+            [
+                callback_pretty_name
+                for call in mock.mock_calls
+                if "first_function"
+                in (callback_pretty_name := call.args[0].callback_pretty_name)
+            ]
+        )
+        == 2
+    )
+
+    assert (
+        len(
+            [
+                callback_pretty_name
+                for call in mock.mock_calls
+                if "second_function"
+                in (callback_pretty_name := call.args[0].callback_pretty_name)
+            ]
+        )
+        == 1
+    )

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,9 @@ def call_soon(`
`31`	`31`	`callback, self._monitor_callback, self._state`
`32`	`32`	`)`
`33`	`33`
`34`		`- return super().call_soon(callback_with_monitoring, args, *kwargs)`
	`34`	`+ handle = super().call_soon(callback_with_monitoring, args, *kwargs)`
	`35`	`+ callback_with_monitoring.set_handle(handle)`
	`36`	`+ return handle`
`35`	`37`
`36`	`38`
`37`	`39`	`class MonitoredAsyncIOEventLoopPolicy(BaseMonitoredEventLoopPolicy):`