From d1af86dd1c9d7995c4b26d8147a759b0781dc97e Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 17 Nov 2021 12:40:28 +0100 Subject: [PATCH 1/3] feat(metrics): Another statsd metric to measure bucket duplication [INGEST-421] Add a set metric that measures the number of unique bucket keys observed at a point/interval in time. This requires us to implement some way of hashing bucket keys such that statsd can consume them. BucketKey already implements Hash to be used in hashmaps. We cannot however use the std hasher: * docs for DefaultHasher explicitly state that the hashes may change across rust releases (i guess this would not be too much of a blocker for our purpose, but it's annoying to keep in mind) * SipHasher is deprecated (but could probably be used) we already have crc32fast in our dependency tree so let's depend on it explicitly and just use that there's still one caveat in that the impls of Hash may call different methods depending on cpu architecture (as stated in https://docs.rs/deterministic-hash/1.0.1/deterministic_hash/), but I think we can live with that for now. --- Cargo.lock | 7 ++++--- relay-metrics/Cargo.toml | 1 + relay-metrics/src/aggregation.rs | 26 +++++++++++++++++++++++++- relay-metrics/src/statsd.rs | 22 +++++++++++++++++++++- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6c149c29ee1..7967ea7fabd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -613,11 +613,11 @@ checksum = "338089f42c427b86394a5ee60ff321da23a5c89c9d89514c829687b26359fcff" [[package]] name = "crc32fast" -version = "1.2.0" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", ] [[package]] @@ -3250,6 +3250,7 @@ name = "relay-metrics" version = "21.11.0" dependencies = [ "actix", + "crc32fast", "criterion", "failure", "float-ord", diff --git a/relay-metrics/Cargo.toml b/relay-metrics/Cargo.toml index abe6244a097..a14692c46c8 100644 --- a/relay-metrics/Cargo.toml +++ b/relay-metrics/Cargo.toml @@ -19,6 +19,7 @@ relay-statsd = { path = "../relay-statsd" } serde = { version = "1.0.114", features = ["derive"] } serde_json = "1.0.55" failure = "0.1.8" +crc32fast = "1.2.1" [dev-dependencies] criterion = "0.3" diff --git a/relay-metrics/src/aggregation.rs b/relay-metrics/src/aggregation.rs index b3c49f859b0..f8543534ff0 100644 --- a/relay-metrics/src/aggregation.rs +++ b/relay-metrics/src/aggregation.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use relay_common::{MonotonicResult, ProjectKey, UnixTimestamp}; -use crate::statsd::{MetricCounters, MetricGauges, MetricHistograms, MetricTimers}; +use crate::statsd::{MetricCounters, MetricGauges, MetricHistograms, MetricSets, MetricTimers}; use crate::{Metric, MetricType, MetricUnit, MetricValue}; /// A snapshot of values within a [`Bucket`]. @@ -706,6 +706,24 @@ struct BucketKey { tags: BTreeMap, } +impl BucketKey { + /// An extremely hamfisted way to hash a bucket key into an integer. + /// + /// This is necessary for (and probably only useful for) reporting unique bucket keys in a + /// cadence set metric, as cadence set metrics can only be constructed from values that + /// implement [`cadence::ext::ToSetValue`]. This type is only implemented for [`i64`], and + /// while we could implement it directly for [`BucketKey`] the documentation advises us not to + /// interact with this trait. + /// + fn as_integer_lossy(&self) -> i64 { + // XXX: The way this hasher is used may be platform-dependent. If we want to produce the + // same hash across platforms, the `deterministic_hash` crate may be useful. + let mut hasher = crc32fast::Hasher::new(); + std::hash::Hash::hash(self, &mut hasher); + hasher.finalize() as i64 + } +} + /// Parameters used by the [`Aggregator`]. #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(default)] @@ -996,6 +1014,12 @@ impl Aggregator { metric_type = entry.key().metric_type.as_str(), metric_name = &entry.key().metric_name ); + relay_statsd::metric!( + set(MetricSets::UniqueBucketsCreated) = entry.key().as_integer_lossy(), + metric_type = entry.key().metric_type.as_str(), + metric_name = &entry.key().metric_name + ); + let flush_at = self.config.get_flush_time(timestamp); entry.insert(QueuedBucket::new(flush_at, value.into())); } diff --git a/relay-metrics/src/statsd.rs b/relay-metrics/src/statsd.rs index f3bd26a943a..9f2bbf0e9fc 100644 --- a/relay-metrics/src/statsd.rs +++ b/relay-metrics/src/statsd.rs @@ -1,4 +1,24 @@ -use relay_statsd::{CounterMetric, GaugeMetric, HistogramMetric, TimerMetric}; +use relay_statsd::{CounterMetric, GaugeMetric, HistogramMetric, SetMetric, TimerMetric}; + +pub enum MetricSets { + /// Count the number of unique buckets created. + /// + /// This is a set of bucketing keys. The metric is basically equivalent to + /// `metrics.buckets.merge.miss` for a single Relay, but could be useful to determine how much + /// duplicate buckets there are when multiple instances are running. + /// + /// The hashing is platform-dependent at the moment, so all your relays that send this metric + /// should run on the same CPU architecture, otherwise this metric is not reliable. + UniqueBucketsCreated, +} + +impl SetMetric for MetricSets { + fn name(&self) -> &'static str { + match *self { + Self::UniqueBucketsCreated => "metrics.buckets.created.unique", + } + } +} /// Counter metrics for Relay Metrics. pub enum MetricCounters { From 2cfb00fca672889e5bcde941dc9e72f60779a0ed Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 17 Nov 2021 15:39:12 +0100 Subject: [PATCH 2/3] type -> trait --- relay-metrics/src/aggregation.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/relay-metrics/src/aggregation.rs b/relay-metrics/src/aggregation.rs index f8543534ff0..ee4be486da1 100644 --- a/relay-metrics/src/aggregation.rs +++ b/relay-metrics/src/aggregation.rs @@ -711,7 +711,7 @@ impl BucketKey { /// /// This is necessary for (and probably only useful for) reporting unique bucket keys in a /// cadence set metric, as cadence set metrics can only be constructed from values that - /// implement [`cadence::ext::ToSetValue`]. This type is only implemented for [`i64`], and + /// implement [`cadence::ext::ToSetValue`]. This trait is only implemented for [`i64`], and /// while we could implement it directly for [`BucketKey`] the documentation advises us not to /// interact with this trait. /// From e3cd9c82871aacfd895f51929bd865025669435c Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 17 Nov 2021 16:02:01 +0100 Subject: [PATCH 3/3] add changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a874cfeae8..d1d9a8ed068 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +**Internal**: + +- Another statsd metric to measure bucket duplication. ([#1128](https://github.com/getsentry/relay/pull/1128)) + ## 21.11.0 **Features**: