Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve schedule quality feature #1602

Merged
merged 17 commits into from
Mar 27, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
using URLs without a top-level domain ([1266](https://github.com/grafana/oncall/pull/1266))
- Updated wording when creating an integration ([1572](https://github.com/grafana/oncall/pull/1572))
- Set FCM iOS/Android "message priority" to "high priority" for mobile app push notifications ([1612](https://github.com/grafana/oncall/pull/1612))
- Improve schedule quality feature (by @vadimkerr in [#1602](https://github.com/grafana/oncall/pull/1602))

### Fixed

Expand Down
10 changes: 4 additions & 6 deletions engine/apps/api/views/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
from apps.auth_token.constants import SCHEDULE_EXPORT_TOKEN_NAME
from apps.auth_token.models import ScheduleExportAuthToken
from apps.schedules.models import OnCallSchedule
from apps.schedules.quality_score import get_schedule_quality_score
from apps.slack.models import SlackChannel
from apps.slack.tasks import update_slack_user_group_for_schedules
from common.api_helpers.exceptions import BadRequest, Conflict
Expand Down Expand Up @@ -353,13 +352,12 @@ def related_escalation_chains(self, request, pk):
@action(detail=True, methods=["get"])
def quality(self, request, pk):
schedule = self.get_object()
user_tz, date = self.get_request_timezone()
days = int(self.request.query_params.get("days", 90)) # todo: check if days could be calculated more precisely

events = schedule.filter_events(user_tz, date, days=days, with_empty=True, with_gap=True)
_, date = self.get_request_timezone()
days = self.request.query_params.get("days")
days = int(days) if days else None

schedule_score = get_schedule_quality_score(events, days)
return Response(schedule_score)
return Response(schedule.quality_report(date, days))

@action(detail=False, methods=["get"])
def type_options(self, request):
Expand Down
147 changes: 147 additions & 0 deletions engine/apps/schedules/models/on_call_schedule.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import datetime
import functools
import itertools
from collections import defaultdict
from enum import Enum
from typing import Iterable, Optional, TypedDict

import icalendar
import pytz
Expand All @@ -23,9 +26,33 @@
list_of_oncall_shifts_from_ical,
)
from apps.schedules.models import CustomOnCallShift
from apps.user_management.models import User
from common.public_primary_keys import generate_public_primary_key, increase_public_primary_key_length


# Utility classes for schedule quality report
class QualityReportCommentType(str, Enum):
INFO = "info"
WARNING = "warning"


class QualityReportComment(TypedDict):
type: QualityReportCommentType
text: str


class QualityReportOverloadedUser(TypedDict):
id: str
username: str
score: int


class QualityReport(TypedDict):
total_score: int
comments: list[QualityReportComment]
overloaded_users: list[QualityReportOverloadedUser]


def generate_public_primary_key_for_oncall_schedule_channel():
prefix = "S"
new_public_primary_key = generate_public_primary_key(prefix)
Expand Down Expand Up @@ -256,6 +283,126 @@ def final_events(self, user_tz, starting_date, days):
events = self._resolve_schedule(events)
return events

def quality_report(self, date: Optional[timezone.datetime], days: Optional[int]) -> QualityReport:
"""
Return schedule quality report to be used by the web UI.
TODO: Add scores on "inside working hours" and "balance outside working hours" when
TODO: working hours editor is implemented in the web UI.
"""
# get events to consider for calculation
if date is None:
today = datetime.datetime.now(tz=datetime.timezone.utc)
date = today - datetime.timedelta(days=7 - today.weekday()) # start of next week in UTC
if days is None:
days = 52 * 7 # consider next 52 weeks (~1 year)

events = self.final_events(user_tz="UTC", starting_date=date, days=days)

# an event is “good” if it's not a gap and not empty
good_events = [event for event in events if not event["is_gap"] and not event["is_empty"]]
if not good_events:
return {
"total_score": 0,
"comments": [{"type": QualityReportCommentType.WARNING, "text": "Schedule is empty"}],
"overloaded_users": [],
}

def event_duration(ev: dict) -> datetime.timedelta:
return ev["end"] - ev["start"]

def timedelta_sum(deltas: Iterable[datetime.timedelta]) -> datetime.timedelta:
return sum(deltas, start=datetime.timedelta())

def score_to_percent(value: float) -> int:
return round(value * 100)

def get_duration_map(evs: list[dict]) -> dict[str, datetime.timedelta]:
"""Return a map of user PKs to total duration of events they are in."""
result = defaultdict(datetime.timedelta)
for ev in evs:
for user in ev["users"]:
user_pk = user["pk"]
result[user_pk] += event_duration(ev)

return result

def get_balance_score_by_duration_map(dur_map: dict[str, datetime.timedelta]) -> float:
"""
Return a score between 0 and 1, based on how balanced the durations are in the duration map.
The formula is taken from https://github.com/grafana/oncall/issues/118#issuecomment-1161787854.
"""
if len(dur_map) <= 1:
return 1

result = 0
for key_1, key_2 in itertools.combinations(dur_map, 2):
duration_1 = dur_map[key_1]
duration_2 = dur_map[key_2]

result += min(duration_1, duration_2) / max(duration_1, duration_2)

number_of_pairs = len(dur_map) * (len(dur_map) - 1) // 2
return result / number_of_pairs

# calculate good event score
good_events_duration = timedelta_sum(event_duration(event) for event in good_events)
good_event_score = min(good_events_duration / datetime.timedelta(days=days), 1)
good_event_score = score_to_percent(good_event_score)

# calculate balance score
duration_map = get_duration_map(good_events)
balance_score = get_balance_score_by_duration_map(duration_map)
balance_score = score_to_percent(balance_score)

# calculate overloaded users
if balance_score >= 95: # tolerate minor imbalance
balance_score = 100
overloaded_users = []
else:
average_duration = timedelta_sum(duration_map.values()) / len(duration_map)
overloaded_user_pks = [user_pk for user_pk, duration in duration_map.items() if duration > average_duration]
usernames = {
u.public_primary_key: u.username
for u in User.objects.filter(public_primary_key__in=overloaded_user_pks).only(
"public_primary_key", "username"
)
}
overloaded_users = []
for user_pk in overloaded_user_pks:
score = score_to_percent(duration_map[user_pk] / average_duration) - 100
username = usernames.get(user_pk) or "unknown" # fallback to "unknown" if user is not found
overloaded_users.append({"id": user_pk, "username": username, "score": score})

# show most overloaded users first
overloaded_users.sort(key=lambda u: (-u["score"], u["username"]))

# generate comments regarding gaps
comments = []
if good_event_score == 100:
comments.append({"type": QualityReportCommentType.INFO, "text": "Schedule has no gaps"})
else:
not_covered = 100 - good_event_score
comments.append(
{"type": QualityReportCommentType.WARNING, "text": f"Schedule has gaps ({not_covered}% not covered)"}
)

# generate comments regarding balance
if balance_score == 100:
comments.append({"type": QualityReportCommentType.INFO, "text": "Schedule is perfectly balanced"})
else:
comments.append(
{"type": QualityReportCommentType.WARNING, "text": "Schedule has balance issues (see overloaded users)"}
)

# calculate total score (weighted sum of good event score and balance score)
total_score = round((good_event_score + balance_score) / 2)

return {
"total_score": total_score,
"comments": comments,
"overloaded_users": overloaded_users,
}

def _resolve_schedule(self, events):
"""Calculate final schedule shifts considering rotations and overrides."""
if not events:
Expand Down
117 changes: 0 additions & 117 deletions engine/apps/schedules/quality_score.py

This file was deleted.

Loading