diff --git a/CHANGELOG.md b/CHANGELOG.md index 32b8514b404..381cbc831f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,12 +13,12 @@ - Fix issue where `$span` would not be recognized in Advanced Data Scrubbing. ([#781](https://github.com/getsentry/relay/pull/781)) - Accept big-endian minidumps. ([#789](https://github.com/getsentry/relay/pull/789)) - Detect network outages and retry sending events instead of dropping them. ([#788](https://github.com/getsentry/relay/pull/788)) -- Rate limit outcomes emitted only for events. ([#806](https://github.com/getsentry/relay/pull/806)) **Internal**: - Project states are now cached separately per DSN public key instead of per project ID. This means that there will be multiple separate cache entries for projects with more than one DSN. ([#778](https://github.com/getsentry/relay/pull/778)) - Relay no longer uses the Sentry endpoint to resolve project IDs by public key. Ingestion for the legacy store endpoint has been refactored to rely on key-based caches only. As a result, the legacy endpoint is supported only on managed Relays. ([#800](https://github.com/getsentry/relay/pull/800)) +- Fix rate limit outcomes, now emitted only for error events but not transactions. ([#806](https://github.com/getsentry/relay/pull/806), [#809](https://github.com/getsentry/relay/pull/809)) ## 20.9.0 diff --git a/relay-quotas/src/rate_limit.rs b/relay-quotas/src/rate_limit.rs index 10e4bfe2d3d..c32476441d6 100644 --- a/relay-quotas/src/rate_limit.rs +++ b/relay-quotas/src/rate_limit.rs @@ -286,6 +286,21 @@ impl RateLimits { pub fn longest(&self) -> Option<&RateLimit> { self.iter().max_by_key(|limit| limit.retry_after) } + + /// Returns the longest rate limit that is error releated. + /// + /// The most relevant rate limit from the point of view of an error generating an outcome + /// is the longest rate limit for error messages. + pub fn longest_error(&self) -> Option<&RateLimit> { + let is_event_related = |rate_limit: &&RateLimit| { + rate_limit.categories.is_empty() + || rate_limit.categories.iter().any(|cat| cat.is_error()) + }; + + self.iter() + .filter(is_event_related) + .max_by_key(|limit| limit.retry_after) + } } /// Immutable rate limits iterator. @@ -651,6 +666,68 @@ mod tests { "###); } + #[test] + fn test_rate_limits_longest_error_none() { + let mut rate_limits = RateLimits::new(); + + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Transaction], + scope: RateLimitScope::Organization(42), + reason_code: None, + retry_after: RetryAfter::from_secs(1), + }); + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Attachment], + scope: RateLimitScope::Organization(42), + reason_code: None, + retry_after: RetryAfter::from_secs(1), + }); + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Session], + scope: RateLimitScope::Organization(42), + reason_code: None, + retry_after: RetryAfter::from_secs(1), + }); + + // only non event rate limits so nothing relevant + assert_eq!(rate_limits.longest_error(), None) + } + + #[test] + fn test_rate_limits_longest_error() { + let mut rate_limits = RateLimits::new(); + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Transaction], + scope: RateLimitScope::Organization(40), + reason_code: None, + retry_after: RetryAfter::from_secs(100), + }); + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Error], + scope: RateLimitScope::Organization(41), + reason_code: None, + retry_after: RetryAfter::from_secs(5), + }); + rate_limits.add(RateLimit { + categories: smallvec![DataCategory::Error], + scope: RateLimitScope::Organization(42), + reason_code: None, + retry_after: RetryAfter::from_secs(7), + }); + + let rate_limit = rate_limits.longest().unwrap(); + insta::assert_ron_snapshot!(rate_limit, @r###" + RateLimit( + categories: [ + transaction, + ], + scope: Organization(40), + reason_code: None, + retry_after: RetryAfter(100), + ) + "###); + } + #[test] fn test_rate_limits_clean_expired() { let mut rate_limits = RateLimits::new(); diff --git a/relay-server/src/actors/events.rs b/relay-server/src/actors/events.rs index 7a2f23be2b7..eb0d3865d13 100644 --- a/relay-server/src/actors/events.rs +++ b/relay-server/src/actors/events.rs @@ -22,7 +22,7 @@ use relay_general::protocol::{ }; use relay_general::store::ClockDriftProcessor; use relay_general::types::{Annotated, Array, Object, ProcessingAction, Value}; -use relay_quotas::{RateLimit, RateLimits}; +use relay_quotas::RateLimits; use relay_redis::RedisPool; use crate::actors::outcome::{DiscardReason, Outcome, OutcomeProducer, TrackOutcome}; @@ -142,9 +142,9 @@ impl ProcessingError { Self::InvalidTransaction => Some(Outcome::Invalid(DiscardReason::InvalidTransaction)), Self::DuplicateItem(_) => Some(Outcome::Invalid(DiscardReason::DuplicateItem)), Self::NoEventPayload => Some(Outcome::Invalid(DiscardReason::NoEventPayload)), - Self::RateLimited(ref rate_limits) => { - most_relevant(rate_limits).map(|r| Outcome::RateLimited(r.reason_code.clone())) - } + Self::RateLimited(ref rate_limits) => rate_limits + .longest_error() + .map(|r| Outcome::RateLimited(r.reason_code.clone())), // Processing-only outcomes (Sentry-internal Relays) #[cfg(feature = "processing")] @@ -170,21 +170,6 @@ impl ProcessingError { } } -/// Returns the most relevant rate limit. -/// -/// The most relevant rate limit is the longest rate limit for events and if there is no -/// rate limit for events then the longest rate limit for anything else -fn most_relevant(rate_limits: &RateLimits) -> Option<&RateLimit> { - let is_event_related = |rate_limit: &&RateLimit| { - rate_limit.categories.is_empty() || rate_limit.categories.iter().any(|cat| cat.is_error()) - }; - - rate_limits - .iter() - .filter(is_event_related) - .max_by_key(|limit| limit.retry_after) -} - type ExtractedEvent = (Annotated, usize); /// A state container for envelope processing. @@ -1626,13 +1611,9 @@ impl Handler for EventManager { #[cfg(test)] mod tests { use super::*; - use smallvec::smallvec; use chrono::{DateTime, TimeZone, Utc}; - use relay_common::DataCategory; - use relay_quotas::{RateLimitScope, RetryAfter}; - fn create_breadcrumbs_item(breadcrumbs: &[(Option>, &str)]) -> Item { let mut data = Vec::new(); @@ -1774,62 +1755,4 @@ mod tests { // regression test to ensure we don't fail parsing an empty file result.expect("event_from_attachments"); } - - #[test] - /// Test that only rate limits related to events are returned - fn test_most_relevant_only_selects_event_rate_limits() { - let mut rate_limits = RateLimits::new(); - rate_limits.add(RateLimit { - categories: smallvec![DataCategory::Transaction], - scope: RateLimitScope::Organization(42), - reason_code: None, - retry_after: RetryAfter::from_secs(1), - }); - rate_limits.add(RateLimit { - categories: smallvec![DataCategory::Attachment], - scope: RateLimitScope::Organization(42), - reason_code: None, - retry_after: RetryAfter::from_secs(1), - }); - rate_limits.add(RateLimit { - categories: smallvec![DataCategory::Session], - scope: RateLimitScope::Organization(42), - reason_code: None, - retry_after: RetryAfter::from_secs(1), - }); - - // only non event rate limits so nothing relevant - assert_eq!(most_relevant(&rate_limits), None) - } - - #[test] - /// Test that the longest event related rate limit is returned - fn test_most_relevant_selects_longest_event_rate_limit() { - let mut rate_limits = RateLimits::new(); - rate_limits.add(RateLimit { - categories: smallvec![DataCategory::Transaction], - scope: RateLimitScope::Organization(40), - reason_code: None, - retry_after: RetryAfter::from_secs(100), - }); - rate_limits.add(RateLimit { - categories: smallvec![DataCategory::Error], - scope: RateLimitScope::Organization(41), - reason_code: None, - retry_after: RetryAfter::from_secs(5), - }); - let longest = RateLimit { - categories: smallvec![DataCategory::Error], - scope: RateLimitScope::Organization(42), - reason_code: None, - retry_after: RetryAfter::from_secs(7), - }; - rate_limits.add(longest); - - let limit = most_relevant(&rate_limits); - //we do have an event rate limit - assert!(limit.is_some()); - // the longest event rate limit is for org 42 - assert_eq!(limit.unwrap().scope, RateLimitScope::Organization(42)) - } } diff --git a/relay-server/src/endpoints/common.rs b/relay-server/src/endpoints/common.rs index 61ec06c121c..97456a24da5 100644 --- a/relay-server/src/endpoints/common.rs +++ b/relay-server/src/endpoints/common.rs @@ -79,8 +79,8 @@ pub enum BadStoreRequest { } impl BadStoreRequest { - fn to_outcome(&self) -> Outcome { - match self { + fn to_outcome(&self) -> Option { + Some(match self { BadStoreRequest::UnsupportedProtocolVersion(_) => { Outcome::Invalid(DiscardReason::AuthVersion) } @@ -119,16 +119,14 @@ impl BadStoreRequest { } BadStoreRequest::RateLimited(rate_limits) => { - let reason_code = rate_limits - .longest() - .and_then(|limit| limit.reason_code.clone()); - - Outcome::RateLimited(reason_code) + return rate_limits + .longest_error() + .map(|r| Outcome::RateLimited(r.reason_code.clone())); } // should actually never create an outcome BadStoreRequest::InvalidEventId => Outcome::Invalid(DiscardReason::Internal), - } + }) } } @@ -457,13 +455,15 @@ where metric!(counter(RelayCounters::EnvelopeRejected) += 1); if is_event { - outcome_producer.do_send(TrackOutcome { - timestamp: start_time, - scoping: *scoping.borrow(), - outcome: error.to_outcome(), - event_id: *event_id.borrow(), - remote_addr, - }); + if let Some(outcome) = error.to_outcome() { + outcome_producer.do_send(TrackOutcome { + timestamp: start_time, + scoping: *scoping.borrow(), + outcome, + event_id: *event_id.borrow(), + remote_addr, + }); + } } if !emit_rate_limit && matches!(error, BadStoreRequest::RateLimited(_)) { diff --git a/tests/integration/test_store.py b/tests/integration/test_store.py index 7a44d368c0f..4bcc283812b 100644 --- a/tests/integration/test_store.py +++ b/tests/integration/test_store.py @@ -550,7 +550,8 @@ def test_processing_quotas( retry_after2, rest = headers["x-sentry-rate-limits"].split(":", 1) assert int(retry_after2) == int(retry_after) assert rest == "%s:key" % category - outcomes_consumer.assert_rate_limited("get_lost", key_id=key_id) + if generates_outcomes: + outcomes_consumer.assert_rate_limited("get_lost", key_id=key_id) relay.dsn_public_key = second_key["publicKey"]