Skip to content

Commit

Permalink
refactor(mito): tidy memtable stats (#4982)
Browse files Browse the repository at this point in the history
* wip: share same WriteMetrics struct between different memtable implementations

* refactor: extract function to update memtable timestamp range
  • Loading branch information
v0y4g3r authored Nov 13, 2024
1 parent 3bbcde8 commit 6afc4e7
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 154 deletions.
1 change: 1 addition & 0 deletions src/mito2/src/memtable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use crate::sst::file::FileTimeRange;
pub mod bulk;
pub mod key_values;
pub mod partition_tree;
mod stats;
pub mod time_partition;
pub mod time_series;
pub(crate) mod version;
Expand Down
44 changes: 2 additions & 42 deletions src/mito2/src/memtable/partition_tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ pub(crate) mod data;
mod dedup;
mod dict;
mod merger;
mod metrics;
mod partition;
mod shard;
mod shard_builder;
Expand All @@ -38,8 +37,8 @@ use table::predicate::Predicate;
use crate::error::{Result, UnsupportedOperationSnafu};
use crate::flush::WriteBufferManagerRef;
use crate::memtable::key_values::KeyValue;
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::tree::PartitionTree;
use crate::memtable::stats::WriteMetrics;
use crate::memtable::{
AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder,
MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats,
Expand Down Expand Up @@ -273,46 +272,7 @@ impl PartitionTreeMemtable {
fn update_stats(&self, metrics: &WriteMetrics) {
// Only let the tracker tracks value bytes.
self.alloc_tracker.on_allocation(metrics.value_bytes);

loop {
let current_min = self.min_timestamp.load(Ordering::Relaxed);
if metrics.min_ts >= current_min {
break;
}

let Err(updated) = self.min_timestamp.compare_exchange(
current_min,
metrics.min_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == metrics.min_ts {
break;
}
}

loop {
let current_max = self.max_timestamp.load(Ordering::Relaxed);
if metrics.max_ts <= current_max {
break;
}

let Err(updated) = self.max_timestamp.compare_exchange(
current_max,
metrics.max_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == metrics.max_ts {
break;
}
}
metrics.update_timestamp_range(&self.max_timestamp, &self.min_timestamp);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/partition_tree/dict.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ use std::sync::Arc;

use datatypes::arrow::array::{Array, ArrayBuilder, BinaryArray, BinaryBuilder};

use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::PkIndex;
use crate::memtable::stats::WriteMetrics;
use crate::metrics::MEMTABLE_DICT_BYTES;

/// Maximum keys in a [DictBlock].
Expand Down
38 changes: 0 additions & 38 deletions src/mito2/src/memtable/partition_tree/metrics.rs

This file was deleted.

2 changes: 1 addition & 1 deletion src/mito2/src/memtable/partition_tree/partition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ use crate::error::Result;
use crate::memtable::key_values::KeyValue;
use crate::memtable::partition_tree::data::{DataBatch, DataParts, DATA_INIT_CAP};
use crate::memtable::partition_tree::dedup::DedupReader;
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::shard::{
BoxedDataBatchSource, Shard, ShardMerger, ShardNode, ShardSource,
};
use crate::memtable::partition_tree::shard_builder::ShardBuilder;
use crate::memtable::partition_tree::{PartitionTreeConfig, PkId};
use crate::memtable::stats::WriteMetrics;
use crate::metrics::PARTITION_TREE_READ_STAGE_ELAPSED;
use crate::read::{Batch, BatchBuilder};
use crate::row_converter::{McmpRowCodec, RowCodec};
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/partition_tree/shard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -428,8 +428,8 @@ mod tests {
use super::*;
use crate::memtable::partition_tree::data::timestamp_array_to_i64_slice;
use crate::memtable::partition_tree::dict::KeyDictBuilder;
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::PkIndex;
use crate::memtable::stats::WriteMetrics;
use crate::memtable::KeyValues;
use crate::test_util::memtable_util::{
build_key_values_with_ts_seq_values, encode_keys, metadata_for_test,
Expand Down
3 changes: 1 addition & 2 deletions src/mito2/src/memtable/partition_tree/shard_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ use crate::memtable::partition_tree::data::{
DataBatch, DataBuffer, DataBufferReader, DataBufferReaderBuilder, DataParts, DATA_INIT_CAP,
};
use crate::memtable::partition_tree::dict::{DictBuilderReader, KeyDictBuilder};
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::partition::PrimaryKeyFilter;
use crate::memtable::partition_tree::shard::Shard;
use crate::memtable::partition_tree::{PartitionTreeConfig, PkId, PkIndex, ShardId};
use crate::memtable::stats::WriteMetrics;
use crate::metrics::PARTITION_TREE_READ_STAGE_ELAPSED;

/// Builder to write keys and data to a shard that the key dictionary
Expand Down Expand Up @@ -318,7 +318,6 @@ mod tests {

use super::*;
use crate::memtable::partition_tree::data::timestamp_array_to_i64_slice;
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::KeyValues;
use crate::test_util::memtable_util::{
build_key_values_with_ts_seq_values, encode_key_by_kv, metadata_for_test,
Expand Down
2 changes: 1 addition & 1 deletion src/mito2/src/memtable/partition_tree/tree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ use table::predicate::Predicate;
use crate::error::{PrimaryKeyLengthMismatchSnafu, Result, SerializeFieldSnafu};
use crate::flush::WriteBufferManagerRef;
use crate::memtable::key_values::KeyValue;
use crate::memtable::partition_tree::metrics::WriteMetrics;
use crate::memtable::partition_tree::partition::{
Partition, PartitionKey, PartitionReader, PartitionRef, ReadPartitionContext,
};
use crate::memtable::partition_tree::PartitionTreeConfig;
use crate::memtable::stats::WriteMetrics;
use crate::memtable::{BoxedBatchIterator, KeyValues};
use crate::metrics::{PARTITION_TREE_READ_STAGE_ELAPSED, READ_ROWS_TOTAL, READ_STAGE_ELAPSED};
use crate::read::dedup::LastNonNullIter;
Expand Down
85 changes: 85 additions & 0 deletions src/mito2/src/memtable/stats.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
// Copyright 2023 Greptime Team
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//! Internal metrics of the memtable.

use std::sync::atomic::{AtomicI64, Ordering};

/// Metrics of writing memtables.
pub(crate) struct WriteMetrics {
/// Size allocated by keys.
pub(crate) key_bytes: usize,
/// Size allocated by values.
pub(crate) value_bytes: usize,
/// Minimum timestamp.
pub(crate) min_ts: i64,
/// Maximum timestamp
pub(crate) max_ts: i64,
}

impl WriteMetrics {
/// Update the min/max timestamp range according to current write metric.
pub(crate) fn update_timestamp_range(&self, prev_max_ts: &AtomicI64, prev_min_ts: &AtomicI64) {
loop {
let current_min = prev_min_ts.load(Ordering::Relaxed);
if self.min_ts >= current_min {
break;
}

let Err(updated) = prev_min_ts.compare_exchange(
current_min,
self.min_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == self.min_ts {
break;
}
}

loop {
let current_max = prev_max_ts.load(Ordering::Relaxed);
if self.max_ts <= current_max {
break;
}

let Err(updated) = prev_max_ts.compare_exchange(
current_max,
self.max_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == self.max_ts {
break;
}
}
}
}

impl Default for WriteMetrics {
fn default() -> Self {
Self {
key_bytes: 0,
value_bytes: 0,
min_ts: i64::MAX,
max_ts: i64::MIN,
}
}
}
83 changes: 15 additions & 68 deletions src/mito2/src/memtable/time_series.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ use crate::error::{
};
use crate::flush::WriteBufferManagerRef;
use crate::memtable::key_values::KeyValue;
use crate::memtable::stats::WriteMetrics;
use crate::memtable::{
AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder,
MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats,
Expand Down Expand Up @@ -140,51 +141,13 @@ impl TimeSeriesMemtable {
}

/// Updates memtable stats.
fn update_stats(&self, stats: LocalStats) {
self.alloc_tracker.on_allocation(stats.allocated);

loop {
let current_min = self.min_timestamp.load(Ordering::Relaxed);
if stats.min_ts >= current_min {
break;
}

let Err(updated) = self.min_timestamp.compare_exchange(
current_min,
stats.min_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == stats.min_ts {
break;
}
}

loop {
let current_max = self.max_timestamp.load(Ordering::Relaxed);
if stats.max_ts <= current_max {
break;
}

let Err(updated) = self.max_timestamp.compare_exchange(
current_max,
stats.max_ts,
Ordering::Relaxed,
Ordering::Relaxed,
) else {
break;
};

if updated == stats.max_ts {
break;
}
}
fn update_stats(&self, stats: WriteMetrics) {
self.alloc_tracker
.on_allocation(stats.key_bytes + stats.value_bytes);
stats.update_timestamp_range(&self.max_timestamp, &self.min_timestamp);
}

fn write_key_value(&self, kv: KeyValue, stats: &mut LocalStats) -> Result<()> {
fn write_key_value(&self, kv: KeyValue, stats: &mut WriteMetrics) -> Result<()> {
ensure!(
kv.num_primary_keys() == self.row_codec.num_fields(),
PrimaryKeyLengthMismatchSnafu {
Expand All @@ -195,9 +158,9 @@ impl TimeSeriesMemtable {
let primary_key_encoded = self.row_codec.encode(kv.primary_keys())?;
let fields = kv.fields().collect::<Vec<_>>();

stats.allocated += fields.iter().map(|v| v.data_size()).sum::<usize>();
stats.value_bytes += fields.iter().map(|v| v.data_size()).sum::<usize>();
let (series, series_allocated) = self.series_set.get_or_add_series(primary_key_encoded);
stats.allocated += series_allocated;
stats.key_bytes += series_allocated;

// safety: timestamp of kv must be both present and a valid timestamp value.
let ts = kv.timestamp().as_timestamp().unwrap().unwrap().value();
Expand All @@ -223,13 +186,13 @@ impl Memtable for TimeSeriesMemtable {
}

fn write(&self, kvs: &KeyValues) -> Result<()> {
let mut local_stats = LocalStats::default();
let mut local_stats = WriteMetrics::default();

for kv in kvs.iter() {
self.write_key_value(kv, &mut local_stats)?;
}
local_stats.allocated += kvs.num_rows() * std::mem::size_of::<Timestamp>();
local_stats.allocated += kvs.num_rows() * std::mem::size_of::<OpType>();
local_stats.value_bytes += kvs.num_rows() * std::mem::size_of::<Timestamp>();
local_stats.value_bytes += kvs.num_rows() * std::mem::size_of::<OpType>();

// TODO(hl): this maybe inaccurate since for-iteration may return early.
// We may lift the primary key length check out of Memtable::write
Expand All @@ -241,11 +204,11 @@ impl Memtable for TimeSeriesMemtable {
}

fn write_one(&self, key_value: KeyValue) -> Result<()> {
let mut local_stats = LocalStats::default();
let res = self.write_key_value(key_value, &mut local_stats);
local_stats.allocated += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();
let mut metrics = WriteMetrics::default();
let res = self.write_key_value(key_value, &mut metrics);
metrics.value_bytes += std::mem::size_of::<Timestamp>() + std::mem::size_of::<OpType>();

self.update_stats(local_stats);
self.update_stats(metrics);
self.num_rows.fetch_add(1, Ordering::Relaxed);
res
}
Expand Down Expand Up @@ -359,22 +322,6 @@ impl Memtable for TimeSeriesMemtable {
}
}

struct LocalStats {
allocated: usize,
min_ts: i64,
max_ts: i64,
}

impl Default for LocalStats {
fn default() -> Self {
LocalStats {
allocated: 0,
min_ts: i64::MAX,
max_ts: i64::MIN,
}
}
}

type SeriesRwLockMap = RwLock<BTreeMap<Vec<u8>, Arc<RwLock<Series>>>>;

#[derive(Clone)]
Expand Down

0 comments on commit 6afc4e7

Please sign in to comment.