@@ -101,9 +101,7 @@ def clear_samples(namespace: ClustererNamespace, project: Project) -> None:
101
101
102
102
103
103
def record_transaction_name (project : Project , event_data : Mapping [str , Any ], ** kwargs : Any ) -> None :
104
- transaction_name = event_data .get ("transaction" )
105
-
106
- if transaction_name and _should_store_transaction_name (event_data ):
104
+ if transaction_name := _should_store_transaction_name (event_data ):
107
105
safe_execute (
108
106
_record_sample ,
109
107
ClustererNamespace .TRANSACTIONS ,
@@ -116,28 +114,41 @@ def record_transaction_name(project: Project, event_data: Mapping[str, Any], **k
116
114
safe_execute (_bump_rule_lifetime , project , event_data , _with_transaction = False )
117
115
118
116
119
- def _should_store_transaction_name (event_data : Mapping [str , Any ]) -> bool :
117
+ def _should_store_transaction_name (event_data : Mapping [str , Any ]) -> Optional [ str ] :
120
118
"""Returns whether the given event must be stored as input for the
121
119
transaction clusterer."""
122
- tags = event_data .get ("tags" )
120
+ transaction_name = event_data .get ("transaction" )
121
+ if not transaction_name :
122
+ return None
123
+
124
+ tags = event_data .get ("tags" ) or {}
123
125
transaction_info = event_data .get ("transaction_info" ) or {}
124
126
source = transaction_info .get ("source" )
125
127
126
- # For now, we also feed back transactions into the clustering algorithm
128
+ # We also feed back transactions into the clustering algorithm
127
129
# that have already been sanitized, so we have a chance to discover
128
130
# more high cardinality segments after partial sanitation.
129
131
# For example, we may have sanitized `/orgs/*/projects/foo`,
130
132
# But the clusterer has yet to discover `/orgs/*/projects/*`.
131
133
#
132
134
# Disadvantage: the load on redis does not decrease over time.
133
135
#
134
- if source not in (TRANSACTION_SOURCE_URL , TRANSACTION_SOURCE_SANITIZED ):
135
- return False
136
+ source_matches = source in (TRANSACTION_SOURCE_URL , TRANSACTION_SOURCE_SANITIZED ) or (
137
+ # Relay leaves source None if it expects it to be high cardinality, (otherwise it sets it to "unknown")
138
+ # (see https://github.com/getsentry/relay/blob/2d07bef86415cc0ae8af01d16baecde10cdb23a6/relay-general/src/store/transactions/processor.rs#L369-L373).
139
+ #
140
+ # Our data shows that a majority of these `None` source transactions contain slashes, so treat them as URL transactions:
141
+ source is None
142
+ and "/" in transaction_name
143
+ )
144
+
145
+ if not source_matches :
146
+ return None
136
147
137
148
if tags and HTTP_404_TAG in tags :
138
- return False
149
+ return None
139
150
140
- return True
151
+ return transaction_name
141
152
142
153
143
154
def _bump_rule_lifetime (project : Project , event_data : Mapping [str , Any ]) -> None :
0 commit comments