-
Notifications
You must be signed in to change notification settings - Fork 0
/
latency.cpp
80 lines (69 loc) · 2.47 KB
/
latency.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
//
// SPDX-License-Identifier: MIT
// Copyright (c) 2019 Andriy Berestovskyy <berestovskyy@gmail.com>
//
// Applied Benchmarks: Memory Latency
// Benchmarking Kaby Lake and Haswell memory latency using lists
//
#include <benchmark/benchmark.h>
#include <algorithm>
#include <iostream>
#include <random>
std::mt19937_64 m_generator{std::random_device()()};
// User-defined literals
auto constexpr operator"" _B(unsigned long long int n) { return n; }
auto constexpr operator"" _KB(unsigned long long int n) { return n * 1024; }
auto constexpr operator"" _M(unsigned long long int n) {
return n * 1000 * 1000;
}
// Cache line size: 64 bytes for x86-64, 128 bytes for A64 ARMs
const auto kCachelineSize = 64_B;
// Memory page size. Default page size is 4 KB
const auto kPageSize = 4_KB;
// Singly linked list node with padding
struct Element {
unsigned long index;
std::byte padding[kPageSize];
};
//
// Benchmark memory latency using a list.
//
// @param state.range(0)
// Memory block size in KB to benchmark.
//
static void memory_latency_list(benchmark::State &state) {
const auto mem_block_size = operator""_KB(state.range(0));
// Each memory access fetches a cache line
const auto num_nodes = mem_block_size / kCachelineSize;
assert(num_nodes > 0);
// Allocate a contiguous list of nodes for an iteration
std::vector<Element> list(num_nodes);
// initialise the array with sequential indices
for (auto i = 0; i < num_nodes; ++i) { list[i].index = i; }
// shuffle the array
std::shuffle(list.begin(), list.end(), m_generator);
const auto num_ops = 1_M;
while (state.KeepRunningBatch(num_ops)) {
auto index = 0UL;
// iterate over num_ops
for (auto i = 0; i < num_ops; ++i) {
// access the next node
index = list[index].index;
}
benchmark::DoNotOptimize(index);
}
state.counters["Size"] =
benchmark::Counter(mem_block_size, benchmark::Counter::kDefaults,
benchmark::Counter::OneK::kIs1024);
state.counters["Nodes"] =
benchmark::Counter(num_nodes, benchmark::Counter::kDefaults,
benchmark::Counter::OneK::kIs1024);
state.counters["Read Rate"] = benchmark::Counter(
state.iterations() * kCachelineSize, benchmark::Counter::kIsRate,
benchmark::Counter::OneK::kIs1024);
}
BENCHMARK(memory_latency_list)
->ArgName("size KB")
->RangeMultiplier(2)
->Range(1, 1 << 22);
BENCHMARK_MAIN();