Skip to content

Commit a936c8c

Browse files
authored
Improve schedule quality feature (#1602)
# What this PR does Before: <img width="281" alt="Screenshot 2023-03-23 at 16 56 42" src="https://user-images.githubusercontent.com/20116910/227279464-c883ec05-a964-4360-bda2-3443409ca90a.png"> After: <img width="338" alt="Screenshot 2023-03-23 at 16 57 41" src="https://user-images.githubusercontent.com/20116910/227279476-468bffba-922a-45ea-b400-5f34d6bf0534.png"> - Add scores for overloaded users, e.g. `(+25% avg)` which means the user is scheduled to be on-call 25% more than average for given schedule. - Add score for gaps, e.g. `Schedule has gaps (29% not covered)` which means 29% of time no one is scheduled to be on-call. - Make things easier to understand when there are gaps in the schedule, add `(see overloaded users)` text. - Consider events for next 52 weeks (~1 year) instead of 90 days (~3 months), so the quality report is more accurate. Also treat any balance quality >95% as perfectly balanced. These two changes (period change and adding 95% threshold) should help eliminate false positives for _most_ schedules. - Modify backend & frontend so the backend returns all necessary user information to render without using the user store. - Move quality report generation to `OnCallSchedule` model, add more tests. ## Which issue(s) this PR fixes Related to #1552 ## Checklist - [x] Tests updated - [x] `CHANGELOG.md` updated (public docs will be added in a separate PR)
1 parent a2caeae commit a936c8c

File tree

10 files changed

+430
-179
lines changed

10 files changed

+430
-179
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
using URLs without a top-level domain ([1266](https://github.com/grafana/oncall/pull/1266))
1515
- Updated wording when creating an integration ([1572](https://github.com/grafana/oncall/pull/1572))
1616
- Set FCM iOS/Android "message priority" to "high priority" for mobile app push notifications ([1612](https://github.com/grafana/oncall/pull/1612))
17+
- Improve schedule quality feature (by @vadimkerr in [#1602](https://github.com/grafana/oncall/pull/1602))
1718

1819
### Fixed
1920

engine/apps/api/views/schedule.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
from apps.auth_token.constants import SCHEDULE_EXPORT_TOKEN_NAME
2828
from apps.auth_token.models import ScheduleExportAuthToken
2929
from apps.schedules.models import OnCallSchedule
30-
from apps.schedules.quality_score import get_schedule_quality_score
3130
from apps.slack.models import SlackChannel
3231
from apps.slack.tasks import update_slack_user_group_for_schedules
3332
from common.api_helpers.exceptions import BadRequest, Conflict
@@ -353,13 +352,12 @@ def related_escalation_chains(self, request, pk):
353352
@action(detail=True, methods=["get"])
354353
def quality(self, request, pk):
355354
schedule = self.get_object()
356-
user_tz, date = self.get_request_timezone()
357-
days = int(self.request.query_params.get("days", 90)) # todo: check if days could be calculated more precisely
358355

359-
events = schedule.filter_events(user_tz, date, days=days, with_empty=True, with_gap=True)
356+
_, date = self.get_request_timezone()
357+
days = self.request.query_params.get("days")
358+
days = int(days) if days else None
360359

361-
schedule_score = get_schedule_quality_score(events, days)
362-
return Response(schedule_score)
360+
return Response(schedule.quality_report(date, days))
363361

364362
@action(detail=False, methods=["get"])
365363
def type_options(self, request):

engine/apps/schedules/models/on_call_schedule.py

+147
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
import datetime
22
import functools
33
import itertools
4+
from collections import defaultdict
5+
from enum import Enum
6+
from typing import Iterable, Optional, TypedDict
47

58
import icalendar
69
import pytz
@@ -23,9 +26,33 @@
2326
list_of_oncall_shifts_from_ical,
2427
)
2528
from apps.schedules.models import CustomOnCallShift
29+
from apps.user_management.models import User
2630
from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length
2731

2832

33+
# Utility classes for schedule quality report
34+
class QualityReportCommentType(str, Enum):
35+
INFO = "info"
36+
WARNING = "warning"
37+
38+
39+
class QualityReportComment(TypedDict):
40+
type: QualityReportCommentType
41+
text: str
42+
43+
44+
class QualityReportOverloadedUser(TypedDict):
45+
id: str
46+
username: str
47+
score: int
48+
49+
50+
class QualityReport(TypedDict):
51+
total_score: int
52+
comments: list[QualityReportComment]
53+
overloaded_users: list[QualityReportOverloadedUser]
54+
55+
2956
def generate_public_primary_key_for_oncall_schedule_channel():
3057
prefix = "S"
3158
new_public_primary_key = generate_public_primary_key(prefix)
@@ -256,6 +283,126 @@ def final_events(self, user_tz, starting_date, days):
256283
events = self._resolve_schedule(events)
257284
return events
258285

286+
def quality_report(self, date: Optional[timezone.datetime], days: Optional[int]) -> QualityReport:
287+
"""
288+
Return schedule quality report to be used by the web UI.
289+
TODO: Add scores on "inside working hours" and "balance outside working hours" when
290+
TODO: working hours editor is implemented in the web UI.
291+
"""
292+
# get events to consider for calculation
293+
if date is None:
294+
today = datetime.datetime.now(tz=datetime.timezone.utc)
295+
date = today - datetime.timedelta(days=7 - today.weekday()) # start of next week in UTC
296+
if days is None:
297+
days = 52 * 7 # consider next 52 weeks (~1 year)
298+
299+
events = self.final_events(user_tz="UTC", starting_date=date, days=days)
300+
301+
# an event is “good” if it's not a gap and not empty
302+
good_events = [event for event in events if not event["is_gap"] and not event["is_empty"]]
303+
if not good_events:
304+
return {
305+
"total_score": 0,
306+
"comments": [{"type": QualityReportCommentType.WARNING, "text": "Schedule is empty"}],
307+
"overloaded_users": [],
308+
}
309+
310+
def event_duration(ev: dict) -> datetime.timedelta:
311+
return ev["end"] - ev["start"]
312+
313+
def timedelta_sum(deltas: Iterable[datetime.timedelta]) -> datetime.timedelta:
314+
return sum(deltas, start=datetime.timedelta())
315+
316+
def score_to_percent(value: float) -> int:
317+
return round(value * 100)
318+
319+
def get_duration_map(evs: list[dict]) -> dict[str, datetime.timedelta]:
320+
"""Return a map of user PKs to total duration of events they are in."""
321+
result = defaultdict(datetime.timedelta)
322+
for ev in evs:
323+
for user in ev["users"]:
324+
user_pk = user["pk"]
325+
result[user_pk] += event_duration(ev)
326+
327+
return result
328+
329+
def get_balance_score_by_duration_map(dur_map: dict[str, datetime.timedelta]) -> float:
330+
"""
331+
Return a score between 0 and 1, based on how balanced the durations are in the duration map.
332+
The formula is taken from https://github.com/grafana/oncall/issues/118#issuecomment-1161787854.
333+
"""
334+
if len(dur_map) <= 1:
335+
return 1
336+
337+
result = 0
338+
for key_1, key_2 in itertools.combinations(dur_map, 2):
339+
duration_1 = dur_map[key_1]
340+
duration_2 = dur_map[key_2]
341+
342+
result += min(duration_1, duration_2) / max(duration_1, duration_2)
343+
344+
number_of_pairs = len(dur_map) * (len(dur_map) - 1) // 2
345+
return result / number_of_pairs
346+
347+
# calculate good event score
348+
good_events_duration = timedelta_sum(event_duration(event) for event in good_events)
349+
good_event_score = min(good_events_duration / datetime.timedelta(days=days), 1)
350+
good_event_score = score_to_percent(good_event_score)
351+
352+
# calculate balance score
353+
duration_map = get_duration_map(good_events)
354+
balance_score = get_balance_score_by_duration_map(duration_map)
355+
balance_score = score_to_percent(balance_score)
356+
357+
# calculate overloaded users
358+
if balance_score >= 95: # tolerate minor imbalance
359+
balance_score = 100
360+
overloaded_users = []
361+
else:
362+
average_duration = timedelta_sum(duration_map.values()) / len(duration_map)
363+
overloaded_user_pks = [user_pk for user_pk, duration in duration_map.items() if duration > average_duration]
364+
usernames = {
365+
u.public_primary_key: u.username
366+
for u in User.objects.filter(public_primary_key__in=overloaded_user_pks).only(
367+
"public_primary_key", "username"
368+
)
369+
}
370+
overloaded_users = []
371+
for user_pk in overloaded_user_pks:
372+
score = score_to_percent(duration_map[user_pk] / average_duration) - 100
373+
username = usernames.get(user_pk) or "unknown" # fallback to "unknown" if user is not found
374+
overloaded_users.append({"id": user_pk, "username": username, "score": score})
375+
376+
# show most overloaded users first
377+
overloaded_users.sort(key=lambda u: (-u["score"], u["username"]))
378+
379+
# generate comments regarding gaps
380+
comments = []
381+
if good_event_score == 100:
382+
comments.append({"type": QualityReportCommentType.INFO, "text": "Schedule has no gaps"})
383+
else:
384+
not_covered = 100 - good_event_score
385+
comments.append(
386+
{"type": QualityReportCommentType.WARNING, "text": f"Schedule has gaps ({not_covered}% not covered)"}
387+
)
388+
389+
# generate comments regarding balance
390+
if balance_score == 100:
391+
comments.append({"type": QualityReportCommentType.INFO, "text": "Schedule is perfectly balanced"})
392+
else:
393+
comments.append(
394+
{"type": QualityReportCommentType.WARNING, "text": "Schedule has balance issues (see overloaded users)"}
395+
)
396+
397+
# calculate total score (weighted sum of good event score and balance score)
398+
total_score = round((good_event_score + balance_score) / 2)
399+
400+
return {
401+
"total_score": total_score,
402+
"comments": comments,
403+
"overloaded_users": overloaded_users,
404+
}
405+
259406
def _resolve_schedule(self, events):
260407
"""Calculate final schedule shifts considering rotations and overrides."""
261408
if not events:

engine/apps/schedules/quality_score.py

-117
This file was deleted.

0 commit comments

Comments
 (0)