Skip to content

Commit eeca4a5

Browse files
authored
feat(ddm): Add transaction.source and improve implementation of parsing (#61244)
1 parent 11582bc commit eeca4a5

File tree

2 files changed

+40
-23
lines changed

2 files changed

+40
-23
lines changed

src/sentry/snuba/metrics/extraction.py

+36-20
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
"http.method": "request.method",
8383
"http.url": "request.url",
8484
"http.referer": "request.headers.Referer",
85+
"transaction.source": "transaction.source",
8586
# url is a tag extracted by Sentry itself, on Relay it's received as `request.url`
8687
"url": "request.url",
8788
"sdk.name": "sdk.name",
@@ -238,11 +239,11 @@
238239
"browser.name",
239240
"os.name",
240241
"geo.country_code",
241-
# These are skipped during on demand spec generation and will not be converted to metric extraction conditions
242-
"event.type",
243-
"project",
244242
]
245243

244+
# Query fields that we do not consider for the extraction since they are not needed.
245+
_BLACKLISTED_METRIC_FIELDS = ["event.type", "project"]
246+
246247
# Operators used in ``ComparingRuleCondition``.
247248
CompareOp = Literal["eq", "gt", "gte", "lt", "lte", "glob"]
248249

@@ -356,12 +357,22 @@ def _transform_search_query(query: Sequence[QueryToken]) -> Sequence[QueryToken]
356357
return transformed_query
357358

358359

359-
def _parse_search_query(query: Optional[str]) -> Sequence[QueryToken]:
360+
def _parse_search_query(
361+
query: Optional[str], removed_blacklisted: bool = False
362+
) -> Sequence[QueryToken]:
360363
"""
361364
Parses a search query with the discover grammar and performs some transformations on the AST in order to account for
362365
edge cases.
363366
"""
364-
return _transform_search_query(event_search.parse_search_query(query))
367+
tokens = cast(Sequence[QueryToken], event_search.parse_search_query(query))
368+
# As first step, we transform the search query by applying basic transformations.
369+
tokens = _transform_search_query(tokens)
370+
371+
# As second step, if enabled, we remove elements from the query which are blacklisted.
372+
if removed_blacklisted:
373+
tokens = _cleanup_query(_remove_blacklisted_search_filters(tokens))
374+
375+
return tokens
365376

366377

367378
@dataclass(frozen=True)
@@ -528,7 +539,7 @@ def _get_groupbys_support(groupbys: Sequence[str]) -> SupportedBy:
528539

529540
def _get_query_supported_by(query: Optional[str]) -> SupportedBy:
530541
try:
531-
parsed_query = _parse_search_query(query)
542+
parsed_query = _parse_search_query(query=query, removed_blacklisted=False)
532543

533544
standard_metrics = _is_standard_metrics_query(parsed_query)
534545
on_demand_metrics = _is_on_demand_supported_query(parsed_query)
@@ -543,7 +554,6 @@ def _is_standard_metrics_query(tokens: Sequence[QueryToken]) -> bool:
543554
"""
544555
Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
545556
"""
546-
547557
for token in tokens:
548558
if not _is_standard_metrics_search_filter(token):
549559
return False
@@ -565,7 +575,6 @@ def _is_on_demand_supported_query(tokens: Sequence[QueryToken]) -> bool:
565575
"""
566576
Recursively checks if any of the supplied token contain search filters that can't be handled by standard metrics.
567577
"""
568-
569578
for token in tokens:
570579
if not _is_on_demand_supported_search_filter(token):
571580
return False
@@ -621,6 +630,10 @@ def _is_standard_metrics_search_term(field: str) -> bool:
621630

622631

623632
def _is_on_demand_supported_field(field: str) -> bool:
633+
# If it's a black listed field, we consider it as compatible with on demand.
634+
if field in _BLACKLISTED_METRIC_FIELDS:
635+
return True
636+
624637
try:
625638
_map_field_name(field)
626639
return True
@@ -646,7 +659,7 @@ def to_standard_metrics_query(query: str) -> str:
646659
"transaction.duration:>=1s AND browser.version:1" -> ""
647660
"""
648661
try:
649-
tokens = _parse_search_query(query)
662+
tokens = _parse_search_query(query=query, removed_blacklisted=False)
650663
except InvalidSearchQuery:
651664
logger.exception("Failed to parse search query: %s", query)
652665
raise
@@ -661,7 +674,7 @@ def to_standard_metrics_tokens(tokens: Sequence[QueryToken]) -> Sequence[QueryTo
661674
that has all on-demand filters removed and can be run using only standard metrics.
662675
"""
663676
remaining_tokens = _remove_on_demand_search_filters(tokens)
664-
cleaned_query = cleanup_query(remaining_tokens)
677+
cleaned_query = _cleanup_query(remaining_tokens)
665678
return cleaned_query
666679

667680

@@ -680,7 +693,7 @@ def query_tokens_to_string(tokens: Sequence[QueryToken]) -> str:
680693

681694
def _remove_on_demand_search_filters(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
682695
"""
683-
removes tokens that contain filters that can only be handled by on demand metrics.
696+
Removes tokens that contain filters that can only be handled by on demand metrics.
684697
"""
685698
ret_val: List[QueryToken] = []
686699
for token in tokens:
@@ -691,26 +704,28 @@ def _remove_on_demand_search_filters(tokens: Sequence[QueryToken]) -> Sequence[Q
691704
ret_val.append(ParenExpression(_remove_on_demand_search_filters(token.children)))
692705
else:
693706
ret_val.append(token)
707+
694708
return ret_val
695709

696710

697-
def _remove_event_type_and_project_filter(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
711+
def _remove_blacklisted_search_filters(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
698712
"""
699-
removes event.type: transaction and project:* from the query
713+
Removes tokens that contain filters that are blacklisted.
700714
"""
701715
ret_val: List[QueryToken] = []
702716
for token in tokens:
703717
if isinstance(token, SearchFilter):
704-
if token.key.name not in ["event.type", "project"]:
718+
if token.key.name not in _BLACKLISTED_METRIC_FIELDS:
705719
ret_val.append(token)
706720
elif isinstance(token, ParenExpression):
707-
ret_val.append(ParenExpression(_remove_event_type_and_project_filter(token.children)))
721+
ret_val.append(ParenExpression(_remove_blacklisted_search_filters(token.children)))
708722
else:
709723
ret_val.append(token)
724+
710725
return ret_val
711726

712727

713-
def cleanup_query(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
728+
def _cleanup_query(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
714729
"""
715730
Recreates a valid query from an original query that has had on demand search filters removed.
716731
@@ -729,9 +744,10 @@ def cleanup_query(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
729744
if not isinstance(token, ParenExpression):
730745
removed_empty_parens.append(token)
731746
else:
732-
children = cleanup_query(token.children)
747+
children = _cleanup_query(token.children)
733748
if len(children) > 0:
734749
removed_empty_parens.append(ParenExpression(children))
750+
735751
# remove AND and OR operators at the start of the query
736752
while len(removed_empty_parens) > 0 and isinstance(removed_empty_parens[0], str):
737753
removed_empty_parens.pop(0)
@@ -761,6 +777,7 @@ def cleanup_query(tokens: Sequence[QueryToken]) -> Sequence[QueryToken]:
761777
elif previous_token is not None:
762778
ret_val.append(previous_token)
763779
previous_token = token
780+
764781
# take care of the last token (if any)
765782
if previous_token is not None:
766783
ret_val.append(previous_token)
@@ -1249,9 +1266,8 @@ def _parse_field(value: str) -> FieldParsingResult:
12491266
def _parse_query(value: str) -> QueryParsingResult:
12501267
"""Parse query string into our internal AST format."""
12511268
try:
1252-
conditions = _parse_search_query(value)
1253-
clean_conditions = cleanup_query(_remove_event_type_and_project_filter(conditions))
1254-
return QueryParsingResult(conditions=clean_conditions)
1269+
conditions = _parse_search_query(query=value, removed_blacklisted=True)
1270+
return QueryParsingResult(conditions=conditions)
12551271
except InvalidSearchQuery as e:
12561272
raise Exception(f"Invalid search query '{value}' in on demand spec: {e}")
12571273

tests/sentry/snuba/test_extraction.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
from sentry.snuba.metrics.extraction import (
88
OnDemandMetricSpec,
99
SearchQueryConverter,
10+
_cleanup_query,
1011
apdex_tag_spec,
11-
cleanup_query,
1212
failure_tag_spec,
1313
query_tokens_to_string,
1414
should_use_on_demand_metrics,
@@ -144,6 +144,7 @@ class TestCreatesOndemandMetricSpec:
144144
"",
145145
), # apdex with specified threshold is on-demand metric even without query
146146
("count()", "transaction.duration:>0 my-transaction"),
147+
("count()", "transaction.source:route"),
147148
],
148149
)
149150
def test_creates_on_demand_spec(self, aggregate, query):
@@ -643,7 +644,7 @@ def test_query_tokens_to_string(query):
643644
def test_cleanup_query(dirty, clean):
644645
dirty_tokens = parse_search_query(dirty)
645646
clean_tokens = parse_search_query(clean)
646-
actual_clean = cleanup_query(dirty_tokens)
647+
actual_clean = _cleanup_query(dirty_tokens)
647648

648649
assert actual_clean == clean_tokens
649650

@@ -662,7 +663,7 @@ def test_cleanup_query_with_empty_parens():
662663
+ [paren([paren([paren(["AND", "OR", paren([])])])])] # ((()))
663664
)
664665
clean_tokens = parse_search_query("release:initial AND os.name:android")
665-
actual_clean = cleanup_query(dirty_tokens)
666+
actual_clean = _cleanup_query(dirty_tokens)
666667
assert actual_clean == clean_tokens
667668

668669

0 commit comments

Comments
 (0)