Skip to content

Commit

Permalink
ref(ds): Low cardinality outcome reason (#3623)
Browse files Browse the repository at this point in the history
Reduce noise in outcome reasons by mapping ranges of dynamic sampling
rule IDs together, and ordering them. That is

```python
"123,1004,1500,1403,1404,1000"
# becomes
"1000,1004,1400,1500,?"
```
  • Loading branch information
jjbayer authored May 21, 2024
1 parent 692583e commit 3aa4991
Show file tree
Hide file tree
Showing 8 changed files with 164 additions and 29 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

**Internal**:

- Map outcome reasons for dynamic sampling to reduced set of values. ([#3623](https://github.com/getsentry/relay/pull/3623))

## 24.5.0

**Breaking Changes**:
Expand Down
98 changes: 96 additions & 2 deletions relay-server/src/services/outcome.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
//! pipeline, outcomes may not be emitted if the item is accepted.

use std::borrow::Cow;
use std::collections::BTreeMap;
use std::collections::{BTreeMap, BTreeSet};
use std::error::Error;
use std::net::IpAddr;
use std::sync::Arc;
Expand All @@ -24,6 +24,7 @@ use relay_filter::FilterStatKey;
#[cfg(feature = "processing")]
use relay_kafka::{ClientError, KafkaClient, KafkaTopic};
use relay_quotas::{DataCategory, ReasonCode, Scoping};
use relay_sampling::config::RuleId;
use relay_sampling::evaluation::MatchedRuleIds;
use relay_statsd::metric;
use relay_system::{Addr, FromMessage, Interface, NoResponse, Service};
Expand Down Expand Up @@ -164,7 +165,7 @@ pub enum Outcome {
Filtered(FilterStatKey),

/// The event has been filtered by a Sampling Rule
FilteredSampling(MatchedRuleIds),
FilteredSampling(RuleCategories),

/// The event has been rate limited.
RateLimited(Option<ReasonCode>),
Expand Down Expand Up @@ -251,6 +252,79 @@ impl fmt::Display for Outcome {
}
}

/// A lower-cardinality version of [`RuleId`].
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum RuleCategory {
BoostLowVolumeProjects,
BoostEnvironments,
IgnoreHealthChecks,
BoostKeyTransactions,
Recalibration,
BoostReplayId,
BoostLowVolumeTransactions,
BoostLatestReleases,
Custom,
Other,
}

impl RuleCategory {
fn as_str(&self) -> &'static str {
match self {
Self::BoostLowVolumeProjects => "1000",
Self::BoostEnvironments => "1001",
Self::IgnoreHealthChecks => "1002",
Self::BoostKeyTransactions => "1003",
Self::Recalibration => "1004",
Self::BoostReplayId => "1005",
Self::BoostLowVolumeTransactions => "1400",
Self::BoostLatestReleases => "1500",
Self::Custom => "3000",
Self::Other => "0",
}
}
}

impl From<RuleId> for RuleCategory {
fn from(value: RuleId) -> Self {
match value.0 {
1000 => Self::BoostLowVolumeProjects,
1001 => Self::BoostEnvironments,
1002 => Self::IgnoreHealthChecks,
1003 => Self::BoostKeyTransactions,
1004 => Self::Recalibration,
1005 => Self::BoostReplayId,
1400..=1499 => Self::BoostLowVolumeTransactions,
1500..=1599 => Self::BoostLatestReleases,
3000..=4999 => Self::Custom,
_ => Self::Other,
}
}
}

/// An ordered set of categories that can be used as outcome reason.
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct RuleCategories(pub BTreeSet<RuleCategory>);

impl fmt::Display for RuleCategories {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for (i, c) in self.0.iter().enumerate() {
if i > 0 {
write!(f, ",")?;
}
write!(f, "{}", c.as_str())?;
}
Ok(())
}
}

impl From<MatchedRuleIds> for RuleCategories {
fn from(value: MatchedRuleIds) -> Self {
RuleCategories(BTreeSet::from_iter(
value.0.into_iter().map(RuleCategory::from),
))
}
}

/// Reason for a discarded invalid event.
///
/// Used in `Outcome::Invalid`. Synchronize overlap with Sentry.
Expand Down Expand Up @@ -971,3 +1045,23 @@ impl Service for OutcomeProducerService {
});
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn rule_category_roundtrip() {
let input = "123,1004,1500,1403,1403,1404,1000";
let rule_ids = MatchedRuleIds::parse(input).unwrap();
let rule_categories = RuleCategories::from(rule_ids);

let serialized = rule_categories.to_string();
assert_eq!(&serialized, "1000,1004,1400,1500,0");

assert_eq!(
MatchedRuleIds::parse(&serialized).unwrap(),
MatchedRuleIds([1000, 1004, 1400, 1500, 0].map(RuleId).into())
);
}
}
2 changes: 1 addition & 1 deletion relay-server/src/services/processor/dynamic_sampling.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ pub fn sample_envelope_items(
let unsampled_profiles_enabled = forward_unsampled_profiles(state, global_config);

let matched_rules = sampling_match.into_matched_rules();
let outcome = Outcome::FilteredSampling(matched_rules.clone());
let outcome = Outcome::FilteredSampling(matched_rules.into());
state.managed_envelope.retain_items(|item| {
if unsampled_profiles_enabled && item.ty() == &ItemType::Profile {
item.set_sampled(false);
Expand Down
46 changes: 39 additions & 7 deletions relay-server/src/services/processor/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use relay_sampling::evaluation::MatchedRuleIds;
use relay_system::Addr;

use crate::envelope::{ContentType, ItemType};
use crate::services::outcome::{Outcome, TrackOutcome};
use crate::services::outcome::{Outcome, RuleCategories, TrackOutcome};
use crate::services::processor::{ClientReportGroup, ProcessEnvelopeState, MINIMUM_CLOCK_DRIFT};
use crate::utils::ItemAction;

Expand Down Expand Up @@ -240,6 +240,7 @@ fn outcome_from_parts(field: ClientReportField, reason: &str) -> Result<Outcome,
match field {
ClientReportField::FilteredSampling => match reason.strip_prefix("Sampled:") {
Some(rule_ids) => MatchedRuleIds::parse(rule_ids)
.map(RuleCategories::from)
.map(Outcome::FilteredSampling)
.map_err(|_| ()),
None => Err(()),
Expand All @@ -261,11 +262,11 @@ mod tests {
use std::sync::Arc;

use relay_event_schema::protocol::EventId;
use relay_sampling::config::RuleId;
use relay_sampling::evaluation::ReservoirCounters;

use crate::envelope::{Envelope, Item};
use crate::extractors::RequestMeta;
use crate::services::outcome::RuleCategory;
use crate::services::processor::{ProcessEnvelope, ProcessingGroup};
use crate::services::project::ProjectState;
use crate::testutils::{self, create_test_processor};
Expand Down Expand Up @@ -559,15 +560,46 @@ mod tests {

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123,456"),
Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![
RuleId(123),
RuleId(456),
])))
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"),
Ok(Outcome::FilteredSampling(MatchedRuleIds(vec![RuleId(123)])))
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:123"),
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::Other].into()
)))
);

assert_eq!(
outcome_from_parts(ClientReportField::FilteredSampling, "Sampled:1001"),
Ok(Outcome::FilteredSampling(RuleCategories(
[RuleCategory::BoostEnvironments].into()
)))
);

assert_eq!(
outcome_from_parts(
ClientReportField::FilteredSampling,
"Sampled:1001,1456,1567,3333,4444"
),
Ok(Outcome::FilteredSampling(RuleCategories(
[
RuleCategory::BoostEnvironments,
RuleCategory::BoostLowVolumeTransactions,
RuleCategory::BoostLatestReleases,
RuleCategory::Custom
]
.into()
)))
);
}

Expand Down
4 changes: 2 additions & 2 deletions relay-server/src/services/processor/span/processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use relay_spans::{otel_to_sentry_span, otel_trace::Span as OtelSpan};

use crate::envelope::{ContentType, Envelope, Item, ItemType};
use crate::metrics_extraction::generic::extract_metrics;
use crate::services::outcome::{DiscardReason, Outcome};
use crate::services::outcome::{DiscardReason, Outcome, RuleCategories};
use crate::services::processor::span::extract_transaction_span;
use crate::services::processor::{
dynamic_sampling, Addrs, ProcessEnvelope, ProcessEnvelopeState, ProcessingError,
Expand Down Expand Up @@ -53,7 +53,7 @@ pub fn process(
// once for all spans in the envelope.
let sampling_outcome = match dynamic_sampling::run(state, &config) {
SamplingResult::Match(sampling_match) if sampling_match.should_drop() => Some(
Outcome::FilteredSampling(sampling_match.into_matched_rules()),
Outcome::FilteredSampling(RuleCategories::from(sampling_match.into_matched_rules())),
),
_ => None,
};
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_dynamic_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def test_it_removes_events(mini_sentry, relay):
public_key = config["publicKeys"][0]["publicKey"]

# add a sampling rule to project config that removes all transactions (sample_rate=0)
rules = _add_sampling_config(config, sample_rate=0, rule_type="transaction")
_add_sampling_config(config, sample_rate=0, rule_type="transaction")

# create an envelope with a trace context that is initiated by this project (for simplicity)
envelope, trace_id, event_id = _create_transaction_envelope(public_key)
Expand All @@ -253,7 +253,7 @@ def test_it_removes_events(mini_sentry, relay):
assert outcomes is not None
outcome = outcomes["outcomes"][0]
assert outcome.get("outcome") == 1
assert outcome.get("reason") == f"Sampled:{rules[0]['id']}"
assert outcome.get("reason") == "Sampled:0"


def test_it_does_not_sample_error(mini_sentry, relay):
Expand Down Expand Up @@ -392,7 +392,7 @@ def test_sample_on_parametrized_root_transaction(mini_sentry, relay):
relay.send_envelope(project_id, envelope)

outcome = mini_sentry.captured_outcomes.get(timeout=2)
assert outcome["outcomes"][0]["reason"] == "Sampled:1"
assert outcome["outcomes"][0]["reason"] == "Sampled:0"


def test_it_keeps_events(mini_sentry, relay):
Expand Down
24 changes: 12 additions & 12 deletions tests/integration/test_outcome.py
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,7 @@ def test_outcome_to_client_report(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -853,7 +853,7 @@ def test_outcome_to_client_report(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 1,
}
Expand Down Expand Up @@ -960,7 +960,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1006,7 +1006,7 @@ def test_outcomes_aggregate_dynamic_sampling(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 2,
}
Expand Down Expand Up @@ -1069,7 +1069,7 @@ def test_graceful_shutdown(relay, mini_sentry):
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1120,7 +1120,7 @@ def test_graceful_shutdown(relay, mini_sentry):
"project_id": 42,
"key_id": 123,
"outcome": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"category": 9,
"quantity": 1,
}
Expand Down Expand Up @@ -1157,7 +1157,7 @@ def test_profile_outcomes(
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1242,7 +1242,7 @@ def make_envelope(transaction_name):
"outcome": 1,
"project_id": 42,
"quantity": 6, # len(b"foobar")
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand All @@ -1252,7 +1252,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand All @@ -1262,7 +1262,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
]
Expand Down Expand Up @@ -1751,7 +1751,7 @@ def test_span_outcomes(
"version": 2,
"rules": [
{
"id": 1,
"id": 3001,
"samplingValue": {"type": "sampleRate", "value": 0.0},
"type": "transaction",
"condition": {
Expand Down Expand Up @@ -1827,7 +1827,7 @@ def make_envelope(transaction_name):
"outcome": 1, # Filtered
"project_id": 42,
"quantity": 1,
"reason": "Sampled:1",
"reason": "Sampled:3000",
"source": expected_source,
},
{
Expand Down
Loading

0 comments on commit 3aa4991

Please sign in to comment.