From 3aa4991f63c6c68ecf432680242cb05f4bd7ba9e Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 21 May 2024 11:45:03 +0200 Subject: [PATCH] ref(ds): Low cardinality outcome reason (#3623) Reduce noise in outcome reasons by mapping ranges of dynamic sampling rule IDs together, and ordering them. That is ```python "123,1004,1500,1403,1404,1000" # becomes "1000,1004,1400,1500,?" ``` --- CHANGELOG.md | 6 ++ relay-server/src/services/outcome.rs | 98 ++++++++++++++++++- .../services/processor/dynamic_sampling.rs | 2 +- relay-server/src/services/processor/report.rs | 46 +++++++-- .../src/services/processor/span/processing.rs | 4 +- tests/integration/test_dynamic_sampling.py | 6 +- tests/integration/test_outcome.py | 24 ++--- tests/integration/test_spans.py | 7 +- 8 files changed, 164 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b833a09901..d6ff75b3da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## Unreleased + +**Internal**: + +- Map outcome reasons for dynamic sampling to reduced set of values. ([#3623](https://github.com/getsentry/relay/pull/3623)) + ## 24.5.0 **Breaking Changes**: diff --git a/relay-server/src/services/outcome.rs b/relay-server/src/services/outcome.rs index 3ebedea859..20caa8ed94 100644 --- a/relay-server/src/services/outcome.rs +++ b/relay-server/src/services/outcome.rs @@ -5,7 +5,7 @@ //! pipeline, outcomes may not be emitted if the item is accepted. use std::borrow::Cow; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::error::Error; use std::net::IpAddr; use std::sync::Arc; @@ -24,6 +24,7 @@ use relay_filter::FilterStatKey; #[cfg(feature = "processing")] use relay_kafka::{ClientError, KafkaClient, KafkaTopic}; use relay_quotas::{DataCategory, ReasonCode, Scoping}; +use relay_sampling::config::RuleId; use relay_sampling::evaluation::MatchedRuleIds; use relay_statsd::metric; use relay_system::{Addr, FromMessage, Interface, NoResponse, Service}; @@ -164,7 +165,7 @@ pub enum Outcome { Filtered(FilterStatKey), /// The event has been filtered by a Sampling Rule - FilteredSampling(MatchedRuleIds), + FilteredSampling(RuleCategories), /// The event has been rate limited. RateLimited(Option), @@ -251,6 +252,79 @@ impl fmt::Display for Outcome { } } +/// A lower-cardinality version of [`RuleId`]. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum RuleCategory { + BoostLowVolumeProjects, + BoostEnvironments, + IgnoreHealthChecks, + BoostKeyTransactions, + Recalibration, + BoostReplayId, + BoostLowVolumeTransactions, + BoostLatestReleases, + Custom, + Other, +} + +impl RuleCategory { + fn as_str(&self) -> &'static str { + match self { + Self::BoostLowVolumeProjects => "1000", + Self::BoostEnvironments => "1001", + Self::IgnoreHealthChecks => "1002", + Self::BoostKeyTransactions => "1003", + Self::Recalibration => "1004", + Self::BoostReplayId => "1005", + Self::BoostLowVolumeTransactions => "1400", + Self::BoostLatestReleases => "1500", + Self::Custom => "3000", + Self::Other => "0", + } + } +} + +impl From for RuleCategory { + fn from(value: RuleId) -> Self { + match value.0 { + 1000 => Self::BoostLowVolumeProjects, + 1001 => Self::BoostEnvironments, + 1002 => Self::IgnoreHealthChecks, + 1003 => Self::BoostKeyTransactions, + 1004 => Self::Recalibration, + 1005 => Self::BoostReplayId, + 1400..=1499 => Self::BoostLowVolumeTransactions, + 1500..=1599 => Self::BoostLatestReleases, + 3000..=4999 => Self::Custom, + _ => Self::Other, + } + } +} + +/// An ordered set of categories that can be used as outcome reason. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct RuleCategories(pub BTreeSet); + +impl fmt::Display for RuleCategories { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for (i, c) in self.0.iter().enumerate() { + if i > 0 { + write!(f, ",")?; + } + write!(f, "{}", c.as_str())?; + } + Ok(()) + } +} + +impl From for RuleCategories { + fn from(value: MatchedRuleIds) -> Self { + RuleCategories(BTreeSet::from_iter( + value.0.into_iter().map(RuleCategory::from), + )) + } +} + /// Reason for a discarded invalid event. /// /// Used in `Outcome::Invalid`. Synchronize overlap with Sentry. @@ -971,3 +1045,23 @@ impl Service for OutcomeProducerService { }); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn rule_category_roundtrip() { + let input = "123,1004,1500,1403,1403,1404,1000"; + let rule_ids = MatchedRuleIds::parse(input).unwrap(); + let rule_categories = RuleCategories::from(rule_ids); + + let serialized = rule_categories.to_string(); + assert_eq!(&serialized, "1000,1004,1400,1500,0"); + + assert_eq!( + MatchedRuleIds::parse(&serialized).unwrap(), + MatchedRuleIds([1000, 1004, 1400, 1500, 0].map(RuleId).into()) + ); + } +} diff --git a/relay-server/src/services/processor/dynamic_sampling.rs b/relay-server/src/services/processor/dynamic_sampling.rs index 8f712ad034..843e0d3ca2 100644 --- a/relay-server/src/services/processor/dynamic_sampling.rs +++ b/relay-server/src/services/processor/dynamic_sampling.rs @@ -115,7 +115,7 @@ pub fn sample_envelope_items( let unsampled_profiles_enabled = forward_unsampled_profiles(state, global_config); let matched_rules = sampling_match.into_matched_rules(); - let outcome = Outcome::FilteredSampling(matched_rules.clone()); + let outcome = Outcome::FilteredSampling(matched_rules.into()); state.managed_envelope.retain_items(|item| { if unsampled_profiles_enabled && item.ty() == &ItemType::Profile { item.set_sampled(false); diff --git a/relay-server/src/services/processor/report.rs b/relay-server/src/services/processor/report.rs index 620816c4c7..2f229d28b0 100644 --- a/relay-server/src/services/processor/report.rs +++ b/relay-server/src/services/processor/report.rs @@ -14,7 +14,7 @@ use relay_sampling::evaluation::MatchedRuleIds; use relay_system::Addr; use crate::envelope::{ContentType, ItemType}; -use crate::services::outcome::{Outcome, TrackOutcome}; +use crate::services::outcome::{Outcome, RuleCategories, TrackOutcome}; use crate::services::processor::{ClientReportGroup, ProcessEnvelopeState, MINIMUM_CLOCK_DRIFT}; use crate::utils::ItemAction; @@ -240,6 +240,7 @@ fn outcome_from_parts(field: ClientReportField, reason: &str) -> Result match reason.strip_prefix("Sampled:") { Some(rule_ids) => MatchedRuleIds::parse(rule_ids) + .map(RuleCategories::from) .map(Outcome::FilteredSampling) .map_err(|_| ()), None => Err(()), @@ -261,11 +262,11 @@ mod tests { use std::sync::Arc; use relay_event_schema::protocol::EventId; - use relay_sampling::config::RuleId; use relay_sampling::evaluation::ReservoirCounters; use crate::envelope::{Envelope, Item}; use crate::extractors::RequestMeta; + use crate::services::outcome::RuleCategory; use crate::services::processor::{ProcessEnvelope, ProcessingGroup}; use crate::services::project::ProjectState; use crate::testutils::{self, create_test_processor}; @@ -559,15 +560,46 @@ mod tests { assert_eq!( outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123,456"), - Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![ - RuleId(123), - RuleId(456), - ]))) + Ok(Outcome::FilteredSampling(RuleCategories( + [RuleCategory::Other].into() + ))) ); assert_eq!( outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"), - Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![RuleId(123)]))) + Ok(Outcome::FilteredSampling(RuleCategories( + [RuleCategory::Other].into() + ))) + ); + + assert_eq!( + outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"), + Ok(Outcome::FilteredSampling(RuleCategories( + [RuleCategory::Other].into() + ))) + ); + + assert_eq!( + outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:1001"), + Ok(Outcome::FilteredSampling(RuleCategories( + [RuleCategory::BoostEnvironments].into() + ))) + ); + + assert_eq!( + outcome_from_parts( + ClientReportField::FilteredSampling, + "Sampled:1001,1456,1567,3333,4444" + ), + Ok(Outcome::FilteredSampling(RuleCategories( + [ + RuleCategory::BoostEnvironments, + RuleCategory::BoostLowVolumeTransactions, + RuleCategory::BoostLatestReleases, + RuleCategory::Custom + ] + .into() + ))) ); } diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 0836ba47bf..16f2ae8c36 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -25,7 +25,7 @@ use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan}; use crate::envelope::{ContentType, Envelope, Item, ItemType}; use crate::metrics_extraction::generic::extract_metrics; -use crate::services::outcome::{DiscardReason, Outcome}; +use crate::services::outcome::{DiscardReason, Outcome, RuleCategories}; use crate::services::processor::span::extract_transaction_span; use crate::services::processor::{ dynamic_sampling, Addrs, ProcessEnvelope, ProcessEnvelopeState, ProcessingError, @@ -53,7 +53,7 @@ pub fn process( // once for all spans in the envelope. let sampling_outcome = match dynamic_sampling::run(state, &config) { SamplingResult::Match(sampling_match) if sampling_match.should_drop() => Some( - Outcome::FilteredSampling(sampling_match.into_matched_rules()), + Outcome::FilteredSampling(RuleCategories::from(sampling_match.into_matched_rules())), ), _ => None, }; diff --git a/tests/integration/test_dynamic_sampling.py b/tests/integration/test_dynamic_sampling.py index d1826e9417..34bf3397b4 100644 --- a/tests/integration/test_dynamic_sampling.py +++ b/tests/integration/test_dynamic_sampling.py @@ -238,7 +238,7 @@ def test_it_removes_events(mini_sentry, relay): public_key = config["publicKeys"][0]["publicKey"] # add a sampling rule to project config that removes all transactions (sample_rate=0) - rules = _add_sampling_config(config, sample_rate=0, rule_type="transaction") + _add_sampling_config(config, sample_rate=0, rule_type="transaction") # create an envelope with a trace context that is initiated by this project (for simplicity) envelope, trace_id, event_id = _create_transaction_envelope(public_key) @@ -253,7 +253,7 @@ def test_it_removes_events(mini_sentry, relay): assert outcomes is not None outcome = outcomes["outcomes"][0] assert outcome.get("outcome") == 1 - assert outcome.get("reason") == f"Sampled:{rules[0]['id']}" + assert outcome.get("reason") == "Sampled:0" def test_it_does_not_sample_error(mini_sentry, relay): @@ -392,7 +392,7 @@ def test_sample_on_parametrized_root_transaction(mini_sentry, relay): relay.send_envelope(project_id, envelope) outcome = mini_sentry.captured_outcomes.get(timeout=2) - assert outcome["outcomes"][0]["reason"] == "Sampled:1" + assert outcome["outcomes"][0]["reason"] == "Sampled:0" def test_it_keeps_events(mini_sentry, relay): diff --git a/tests/integration/test_outcome.py b/tests/integration/test_outcome.py index b7435b490b..711e675f5a 100644 --- a/tests/integration/test_outcome.py +++ b/tests/integration/test_outcome.py @@ -798,7 +798,7 @@ def test_outcome_to_client_report(relay, mini_sentry): "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": 0.0}, "type": "transaction", "condition": { @@ -853,7 +853,7 @@ def test_outcome_to_client_report(relay, mini_sentry): "project_id": 42, "key_id": 123, "outcome": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "category": 9, "quantity": 1, } @@ -960,7 +960,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry): "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": 0.0}, "type": "transaction", "condition": { @@ -1006,7 +1006,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry): "project_id": 42, "key_id": 123, "outcome": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "category": 9, "quantity": 2, } @@ -1069,7 +1069,7 @@ def test_graceful_shutdown(relay, mini_sentry): "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": 0.0}, "type": "transaction", "condition": { @@ -1120,7 +1120,7 @@ def test_graceful_shutdown(relay, mini_sentry): "project_id": 42, "key_id": 123, "outcome": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "category": 9, "quantity": 1, } @@ -1157,7 +1157,7 @@ def test_profile_outcomes( "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": 0.0}, "type": "transaction", "condition": { @@ -1242,7 +1242,7 @@ def make_envelope(transaction_name): "outcome": 1, "project_id": 42, "quantity": 6, # len(b"foobar") - "reason": "Sampled:1", + "reason": "Sampled:3000", "source": expected_source, }, { @@ -1252,7 +1252,7 @@ def make_envelope(transaction_name): "outcome": 1, # Filtered "project_id": 42, "quantity": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "source": expected_source, }, { @@ -1262,7 +1262,7 @@ def make_envelope(transaction_name): "outcome": 1, # Filtered "project_id": 42, "quantity": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "source": expected_source, }, ] @@ -1751,7 +1751,7 @@ def test_span_outcomes( "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": 0.0}, "type": "transaction", "condition": { @@ -1827,7 +1827,7 @@ def make_envelope(transaction_name): "outcome": 1, # Filtered "project_id": 42, "quantity": 1, - "reason": "Sampled:1", + "reason": "Sampled:3000", "source": expected_source, }, { diff --git a/tests/integration/test_spans.py b/tests/integration/test_spans.py index 1e8b682440..5a6b703e66 100644 --- a/tests/integration/test_spans.py +++ b/tests/integration/test_spans.py @@ -1788,7 +1788,7 @@ def test_dynamic_sampling( "version": 2, "rules": [ { - "id": 1, + "id": 3001, "samplingValue": {"type": "sampleRate", "value": sample_rate}, "type": "trace", "condition": { @@ -1847,4 +1847,7 @@ def summarize_outcomes(outcomes): else: outcomes = outcomes_consumer.get_outcomes(timeout=10) assert summarize_outcomes(outcomes) == {(12, 1): 4} # Span, Filtered - assert {o["reason"] for o in outcomes} == {"Sampled:1"} + assert {o["reason"] for o in outcomes} == {"Sampled:3000"} + + spans_consumer.assert_empty() + outcomes_consumer.assert_empty()