Skip to content

Commit 42cf889

Browse files
authored
Merge pull request #11 from gnir-work/feature/add-loop-lag-metric
Feature/add loop lag metric
2 parents 8f57aec + 326cd27 commit 42cf889

File tree

15 files changed

+840
-690
lines changed

15 files changed

+840
-690
lines changed

.github/workflows/test_and_lint_package.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
fail-fast: true
1515
matrix:
1616
os: [ "ubuntu-latest" ]
17-
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
17+
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
1818
runs-on: ${{ matrix.os }}
1919
steps:
2020
- name: Check out repository

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,6 @@ cython_debug/
158158
# and can be added to the global gitignore or merged into this file. For a more nuclear
159159
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
160160
.idea/
161-
junit/
161+
junit/
162+
163+
.DS_Store

Makefile

+4-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,7 @@ lint: lint-formatters lint-static-code-analysis
2020
pre-commit: format lint-static-code-analysis
2121

2222
test:
23-
poetry run pytest --cov=monitored_ioloop -n auto tests
23+
poetry run pytest --cov=monitored_ioloop -n auto tests
24+
25+
test-for-flakiness:
26+
poetry run pytest --cov=monitored_ioloop -n auto --count=10

README.md

+9-6
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ The usage is the same as the asyncio event loop, but with `monitored_ioloop.moni
4848
## The monitor callback
4949
The monitor callback will be called for every execution that the event loop initiates.
5050
With every call you will receive an [IoLoopMonitorState](monitored_ioloop/monitoring.py) object that contains the following information:
51-
- `wall_loop_duration`: The time it took to execute the current step of the event loop in wall time.
52-
- `cpu_loop_duration`: The time it took to execute the current step of the event loop in CPU time.
53-
- `handles_count`: The amount of handles (think about them as tasks) that the IO loop is currently handling.
51+
- `callback_wall_time`: Wall executing time of the callback.
52+
- `loop_handles_count`: The amount of handles (think about them as tasks) that the IO loop is currently handling.
53+
- `loop_lag`: The amount of time it took from the moment the task was added to the loop until it was executed.
5454

5555
## Performance impact
5656
As many of you might be concerned about the performance impact of this library, I have run some benchmarks to measure the performance impact of this library.
@@ -65,11 +65,14 @@ Currently there is only the [fastapi with prometheus exporter example](examples/
6565
## Roadmap
6666
- [x] Add support for the amount of `Handle`'s on the event loop
6767
- [x] Add an examples folder
68+
- [x] Add loop lag metric (Inspired from nodejs loop monitoring)
6869
- [ ] Add visibility into which `Handle` are making the event loop slower
69-
- [ ] Add easier integration with popular monitoring tools like Prometheus
7070
- [ ] Add easier integration with `uvicorn`
71+
- [ ] Add easier integration with popular monitoring tools like Prometheus
72+
7173

7274

7375
## Credits
74-
I took a lot of inspiration from the [uvloop](https://github.com/MagicStack/uvloop) project with everythin
75-
regarding the user interface of swapping the IO loop.
76+
* I took a lot of inspiration from the [uvloop](https://github.com/MagicStack/uvloop) project with everything
77+
regarding the user interface of swapping the IO loop.
78+
* The great pycon talk - https://www.youtube.com/watch?v=GSiZkP7cI80&t=16s

examples/fastapi_with_prometheus/server.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ async def blocking_slow(sleep_for: int) -> str:
3434

3535

3636
def monitor_ioloop(ioloop_monitor_state: IoLoopMonitorState) -> None:
37-
ioloop_execution_time_histogram.observe(ioloop_monitor_state.wall_loop_duration)
37+
ioloop_execution_time_histogram.observe(ioloop_monitor_state.callback_wall_time)
3838

3939

4040
def main() -> None:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
from monitored_ioloop.monitored_asyncio import MonitoredAsyncIOEventLoopPolicy
2+
from monitored_ioloop.monitoring import IoLoopMonitorState
3+
import asyncio
4+
import logging
5+
import time
6+
7+
logger = logging.getLogger("simple_python_example")
8+
9+
10+
async def main() -> None:
11+
async def sleep(coroutine_id: int) -> None:
12+
logger.info(f"[id: {coroutine_id}] Before non-blocking sleep")
13+
await asyncio.sleep(1)
14+
logger.info(f"[id: {coroutine_id}] After non-blocking sleep")
15+
16+
async def blocking_sleep(coroutine_id: int) -> None:
17+
logger.info(f"[id: {coroutine_id}] Before blocking sleep")
18+
time.sleep(1)
19+
logger.info(f"[id: {coroutine_id}] After blocking sleep")
20+
21+
await asyncio.gather(
22+
sleep(coroutine_id=0),
23+
blocking_sleep(coroutine_id=1),
24+
sleep(coroutine_id=2),
25+
sleep(coroutine_id=3),
26+
blocking_sleep(coroutine_id=4),
27+
sleep(coroutine_id=5),
28+
)
29+
30+
31+
def monitor(ioloop_monitor_state: IoLoopMonitorState) -> None:
32+
if ioloop_monitor_state.callback_wall_time > 0.1:
33+
logger.warning(
34+
f"Blocking operation detected, executing took: {ioloop_monitor_state.callback_wall_time}"
35+
)
36+
if ioloop_monitor_state.loop_lag > 0.1:
37+
logger.warning(
38+
f"A task was executed after a significant delay: {ioloop_monitor_state.loop_lag}"
39+
)
40+
41+
42+
if __name__ == "__main__":
43+
logging.basicConfig(
44+
level=logging.INFO,
45+
datefmt="[%d-%b-%y %H:%M:%S]",
46+
format="%(asctime)s %(message)s",
47+
)
48+
logger.info("starting")
49+
asyncio.set_event_loop_policy(
50+
MonitoredAsyncIOEventLoopPolicy(monitor_callback=monitor)
51+
)
52+
logger.info("Set event loop")
53+
asyncio.run(main())

monitored_ioloop/monioted_ioloop_base.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@ def __init__(
1616
# as abstract in typeshed, we have to put them in so mypy thinks
1717
# the base methods are overridden. This is the same approach taken
1818
# for the Windows event loop policy classes in typeshed.
19-
def get_child_watcher(self) -> typing.NoReturn:
20-
...
19+
def get_child_watcher(self) -> typing.NoReturn: ...
2120

22-
def set_child_watcher(self, watcher: typing.Any) -> typing.NoReturn:
23-
...
21+
def set_child_watcher(self, watcher: typing.Any) -> typing.NoReturn: ...

monitored_ioloop/monitored_asyncio.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(
2424
def call_soon(
2525
self,
2626
callback: typing.Callable[..., typing.Any],
27-
*args: typing.Any,
27+
*args: typing.Any, # type: ignore
2828
**kwargs: typing.Any,
2929
) -> Handle:
3030
callback_with_monitoring = wrap_callback_with_monitoring(

monitored_ioloop/monitored_uvloop.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def __init__(
3131
def call_soon(
3232
self,
3333
callback: typing.Callable[..., typing.Any],
34-
*args: typing.Any,
34+
*args: typing.Any, # type: ignore
3535
**kwargs: typing.Any,
3636
) -> Handle:
3737
callback_with_monitoring = wrap_callback_with_monitoring(

monitored_ioloop/monitoring.py

+35-13
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,34 @@
99

1010
@dataclass
1111
class IoLoopMonitorState:
12-
# The time it took to execute the loop in wall time (For example asyncio.sleep will also be recorded).
13-
wall_loop_duration: float
14-
# The time it took to execute the loop in cpu time (For example asyncio.sleep will not be recorded).
15-
cpu_loop_duration: float
16-
# The amount of handles in the loop - https://docs.python.org/3/library/asyncio-eventloop.html#callback-handles
17-
handles_count: int
12+
"""
13+
A dataclass containing the state of the loop when the callback was executed.
14+
This class is the interface that the monitor callback will receive.
15+
16+
A basic Lexicon:
17+
* Handle - A wrapper for a callback that is scheduled to be executed by the loop.
18+
* Callback - The function that is executed by the loop.
19+
* Loop - The event loop that is executing the callbacks.
20+
"""
21+
22+
"""
23+
Wall executing time of the callback
24+
It can be the whole coroutine or parts of it, depending on if the executing control
25+
was delegated back the loop or not.
26+
27+
Wall Time explanation - https://en.wikipedia.org/wiki/Wall-clock_time
28+
"""
29+
callback_wall_time: float
30+
31+
"""
32+
The amount of handles in the loop, excluding the current one.
33+
"""
34+
loop_handles_count: int
35+
36+
"""
37+
The amount of time it took from the moment the coroutine was added to the loop until it was executed.
38+
"""
39+
loop_lag: float
1840

1941

2042
@dataclass
@@ -48,21 +70,21 @@ def wrap_callback_with_monitoring(
4870
back to the monitor_callback.
4971
"""
5072
ioloop_state.increase_handles_count_thread_safe(1)
73+
added_to_loop_time = time.perf_counter()
5174

5275
def wrapper(*inner_args: typing.Any, **inner_kwargs: typing.Any) -> typing.Any:
53-
start_wall_time = time.time()
54-
start_cpu_time = time.process_time()
76+
loop_lag = time.perf_counter() - added_to_loop_time
77+
start_wall_time = time.perf_counter()
5578
response = callback(*inner_args, **inner_kwargs)
5679
ioloop_state.decrease_handles_count_thread_safe(1)
57-
wall_duration = time.time() - start_wall_time
58-
cpu_duration = time.process_time() - start_cpu_time
80+
wall_duration = time.perf_counter() - start_wall_time
5981

6082
try:
6183
monitor_callback(
6284
IoLoopMonitorState(
63-
wall_loop_duration=wall_duration,
64-
cpu_loop_duration=cpu_duration,
65-
handles_count=ioloop_state.handles_count,
85+
callback_wall_time=wall_duration,
86+
loop_handles_count=ioloop_state.handles_count,
87+
loop_lag=loop_lag,
6688
)
6789
)
6890
except Exception:

0 commit comments

Comments
 (0)