Skip to content

Commit

Permalink
#0: Update bw and latency test/scripts
Browse files Browse the repository at this point in the history
Sweep mcast
Make it easier to re-generate these numbers
  • Loading branch information
pgkeller committed Nov 5, 2024
1 parent 0b39880 commit e7e4517
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 96 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@
$j = 0;
}

if ($line =~ /per iteration/) {
# output line from test_pgm_dispatch
if ($line =~ /us per iteration/) {
my @parts = split(' ', $line);
my $us = $parts[8];
my $index = index($parts[8], ".");
Expand All @@ -32,6 +33,22 @@
$data->[$j][$i] = $us;
$j++;
}

# output line from test_bw_and_latency
if ($line =~ /BW:/) {
my @parts = split(' ', $line);
my $bw = $parts[7];
$data->[$j][$i] = $bw;
$j++;
}

# output latency from test_bw_and_latency
if ($line =~ /Latency:/) {
my @parts = split(' ', $line);
my $bw = $parts[7];
$data->[$j][$i] = $bw;
$j++;
}
}

for (my $y = 0; $y < $maxj; $y++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ void kernel_main() {
uint32_t read_ptr = cb_addr;
uint32_t write_ptr = cb_addr;
for (int j = 0; j < PAGE_COUNT; j++) {

#if DRAM_BANKED
uint64_t noc_addr = get_dram_noc_addr(j, page_size, 0);
#else
Expand All @@ -24,25 +23,41 @@ void kernel_main() {

#if ISSUE_MCAST
uint64_t dst_noc_multicast_addr =
get_noc_multicast_addr(NOC_ADDR_X, NOC_ADDR_Y, MCAST_NOC_END_ADDR_X, MCAST_NOC_END_ADDR_Y, NOC_MEM_ADDR);
noc_async_write_multicast(write_ptr, dst_noc_multicast_addr, page_size, NUM_MCAST_DESTS);
get_noc_multicast_addr(NOC_ADDR_X, NOC_ADDR_Y, MCAST_NOC_END_ADDR_X, MCAST_NOC_END_ADDR_Y, write_ptr);
noc_async_write_multicast(read_ptr, dst_noc_multicast_addr, page_size, NUM_MCAST_DESTS, LINKED);
#elif WRITE
uint64_t noc_write_addr = NOC_XY_ADDR(NOC_X(NOC_ADDR_X), NOC_Y(NOC_ADDR_Y), write_ptr);
noc_async_write(NOC_MEM_ADDR, noc_write_addr, page_size);
#elif READ_ONE_PACKET
noc_async_read_one_packet(noc_addr, read_ptr, page_size);
#else
noc_async_read(noc_addr, read_ptr, page_size);
#endif

#if LATENCY
noc_async_read_barrier();
#if WRITE
#if LINKED
noc_async_write_multicast(cb_addr, dst_noc_multicast_addr, page_size, NUM_MCAST_DESTS, false);
#endif
noc_async_write_barrier();
#else
noc_async_read_barrier();
#endif
#endif

read_ptr += page_size;
write_ptr += page_size;
}
}
#if !LATENCY
noc_async_read_barrier();
#if WRITE
#if LINKED
uint64_t dst_noc_multicast_addr =
get_noc_multicast_addr(NOC_ADDR_X, NOC_ADDR_Y, MCAST_NOC_END_ADDR_X, MCAST_NOC_END_ADDR_Y, cb_addr);
noc_async_write_multicast(cb_addr, dst_noc_multicast_addr, page_size, NUM_MCAST_DESTS, false);
#endif
noc_async_write_barrier();
#else
noc_async_read_barrier();
#endif
#endif
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#/bin/bash

# Run this test w/
# sweep_bw_and_latency.sh 2>&1 | tee log
# the run
# filt_pgm_dispatch.pl
# then paste the results into the BW spreadsheet

if [ "$ARCH_NAME" = "grayskull" ]; then
echo "Configured core range for grayskull"
max_x=11
max_y=8
elif [ "$ARCH_NAME" = "wormhole_b0" ]; then
echo "Configured core range for wormhole_b0"
max_x=7
max_y=6
elif [ "$ARCH_NAME" = "blackhole" ]; then
echo "Configured core range for blackhole"
max_x=12
max_y=9
else
echo "Unknown arch: $ARCH_NAME"
exit 1
fi

function get_half_way_away_core_x() {
half_way_away_core_x=$(( ($1 + (($max_x + 1) / 2)) % ($max_x + 1) ))
echo $half_way_away_core_x
}

function get_half_way_away_core_y() {
half_way_away_core_y=$(( ($1 + (($max_y + 1) / 2)) % ($max_y + 1) ))
echo $half_way_away_core_y
}

hx=$(get_half_way_away_core_x 0);
hy=$(get_half_way_away_core_y 0);

function run_one() {
echo "Running $@"
build/test/tt_metal/perf_microbenchmark/dispatch/test_bw_and_latency $@
}

function bw_test() {
run_one -bs 8 -p 16 $@
run_one -bs 8 -p 32 $@
run_one -bs 16 -p 64 $@
run_one -bs 32 -p 128 $@
run_one -bs 64 -p 256 $@
run_one -bs 128 -p 512 $@
run_one -bs 256 -p 1024 $@
run_one -bs 256 -p 2048 $@
run_one -bs 256 -p 4096 $@
run_one -bs 256 -p 8192 $@
run_one -bs 256 -p 16384 $@
run_one -bs 256 -p 32768 $@
run_one -bs 256 -p 65536 $@
}

function latency_test() {
run_one -bs 8 -p 16 -l $@
}

echo "###" read pcie
bw_test "-m 0"
latency_test "-m 0"

echo "###" read dram
bw_test "-m 1"
latency_test "-m 1"

echo "###" read drams
bw_test "-m 3"
latency_test "-m 3"

echo "###" read l1 adjacent
bw_test "-m 2"
latency_test "-m 2"

echo "###" read l1 far halfway away
bw_test "-m 2 -rx 0 -ry 0 -sx $hx -sy $hy"
latency_test "-m 2 -rx 0 -ry 0 -sx $hx -sy $hy"

echo "###" read local
bw_test "-m 2 -rx 0"
latency_test "-m 2 -rx 0"

echo "###" write l1 far halfway away
bw_test "-m 2 -rx 0 -ry 0 -sx $hx -sy $hy -wr"
latency_test "-m 2 -rx 0 -ry 0 -sx $hx -sy $hy -wr"

echo "###" mcast write to adjacent
bw_test "-m 6 -rx 0 -ry 0 -sx 1 -sy 0 -tx 1 -ty 0"
latency_test "-m 6 -rx 0 -ry 0 -sx 1 -sy 0 -tx 1 -ty 0"

echo "###" mcast write to halfway away
bw_test "-m 6 -rx 0 -ry 0 -sx $hx -sy $hy -tx $hx -ty $hy"
latency_test "-m 6 -rx 0 -ry 0 -sx $hx -sy $hy -tx $hx -ty $hy"

echo "###" mcast write to all
bw_test "-m 6 -rx 0 -ry 0 -sx 0 -sy 1 -tx $max_x -ty $max_y"
latency_test "-m 6 -rx 0 -ry 0 -sx 0 -sy 1 -tx $max_x -ty $max_y"

echo "###" mcast write to all, linked
bw_test "-m 6 -rx 0 -ry 0 -sx 0 -sy 1 -tx $max_x -ty $max_y -link"
latency_test "-m 6 -rx 0 -ry 0 -sx 0 -sy 1 -tx $max_x -ty $max_y -link"

echo "###" done
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ bool page_size_as_runtime_arg_g; // useful particularly on GS multi-dram tests (
bool hammer_write_reg_g = false;
bool hammer_pcie_g = false;
bool hammer_pcie_type_g = false;
bool test_write = false;
bool linked = false;

void init(int argc, char **argv) {
std::vector<std::string> input_args(argv, argv + argc);
Expand All @@ -67,10 +69,12 @@ void init(int argc, char **argv) {
log_info(LogTest, " -sy: when reading from L1, Y of core to read from. when issuing a multicast write, Y of start core to write to. (default {})", 0);
log_info(LogTest, " -tx: when issuing a multicast write, X of end core to write to (default {})", 0);
log_info(LogTest, " -ty: when issuing a multicast write, Y of end core to write to (default {})", 0);
log_info(LogTest, " -wr: issue unicast write instead of read (default false)");
log_info(LogTest, " -c: when reading from dram, DRAM channel (default 0)");
log_info(LogTest, " -f: time just the finish call (use w/ lazy mode) (default disabled)");
log_info(LogTest, " -o: use read_one_packet API. restricts page size to 8K max (default {})", 0);
log_info(LogTest, " -z: enable dispatch lazy mode (default disabled)");
log_info(LogTest, "-link: link mcast transactions");
log_info(LogTest, " -hr: hammer write_reg while executing (for PCIe test)");
log_info(LogTest, " -hp: hammer hugepage PCIe memory while executing (for PCIe test)");
log_info(LogTest, " -hpt:hammer hugepage PCIe hammer type: 0:32bit writes 1:128bit non-temporal writes");
Expand Down Expand Up @@ -104,6 +108,14 @@ void init(int argc, char **argv) {
}
page_count_g = size_bytes / page_size_g;

test_write = test_args::has_command_option(input_args, "-wr");
if (test_write && (source_mem_g != 2 && source_mem_g != 6)) {
log_info(LogTest, "Writing only tested w/ L1 destination\n");
exit(-1);
}

linked = test_args::has_command_option(input_args, "-link");

worker_g = CoreRange({core_x, core_y}, {core_x, core_y});
src_worker_g = {src_core_x, src_core_y};

Expand Down Expand Up @@ -200,7 +212,7 @@ int main(int argc, char **argv) {
break;
case 2:
{
src_mem = "FROM_L1";
src_mem = test_write ? "TO_L1" : "FROM_L1";
CoreCoord w = device->physical_core_from_logical_core(src_worker_g, CoreType::WORKER);
noc_addr_x = w.x;
noc_addr_y = w.y;
Expand Down Expand Up @@ -245,6 +257,7 @@ int main(int argc, char **argv) {
noc_addr_y = start.y;
mcast_noc_addr_end_x = end.x;
mcast_noc_addr_end_y = end.y;
test_write = true;
}
break;
}
Expand All @@ -259,6 +272,8 @@ int main(int argc, char **argv) {
{"READ_ONE_PACKET", std::to_string(read_one_packet_g)},
{"DRAM_BANKED", std::to_string(dram_banked)},
{"ISSUE_MCAST", std::to_string(issue_mcast)},
{"WRITE", std::to_string(test_write)},
{"LINKED", std::to_string(linked)},
{"NUM_MCAST_DESTS", std::to_string(num_mcast_dests)},
{"MCAST_NOC_END_ADDR_X", std::to_string(mcast_noc_addr_end_x)},
{"MCAST_NOC_END_ADDR_Y", std::to_string(mcast_noc_addr_end_y)}
Expand All @@ -284,27 +299,29 @@ int main(int argc, char **argv) {

CoreCoord w = device->physical_core_from_logical_core(worker_g.start_coord, CoreType::WORKER);
log_info(LogTest, "Master core: {}", w.str());
string direction = test_write ? "Writing" : "Reading";
if (source_mem_g == 3) {
log_info(LogTest, "Reading: {}", src_mem);
log_info(LogTest, "{}: {}", direction, src_mem);
} else if (source_mem_g == 4) {
log_info(LogTest, "Reading: {} - core ({}, {})", src_mem, w.x, w.y);
log_info(LogTest, "{}: {} - core ({}, {})", direction, src_mem, w.x, w.y);
} else if (source_mem_g == 5) {
log_info(LogTest, "Writing: {} - core ({}, {})", src_mem, w.x, w.y);
log_info(LogTest, "{}: {} - core ({}, {})", test_write, src_mem, w.x, w.y);
} else if (source_mem_g == 6) {
log_info(LogTest, "Writing: {} - core grid [({}, {}) - ({}, {})]", src_mem, noc_addr_x, noc_addr_y, mcast_noc_addr_end_x, mcast_noc_addr_end_y);
log_info(LogTest, "direction: {} - core grid [({}, {}) - ({}, {})]", direction, src_mem, noc_addr_x, noc_addr_y, mcast_noc_addr_end_x, mcast_noc_addr_end_y);
} else {
log_info(LogTest, "Reading: {} - core ({}, {})", src_mem, noc_addr_x, noc_addr_y);
log_info(LogTest, "{}: {} - core ({}, {})", direction, src_mem, noc_addr_x, noc_addr_y);
}
if (source_mem_g < 4 || source_mem_g == 6) {
std::string api;
string read_write = test_write ? "write" : "read";
if (issue_mcast) {
api = "noc_async_write_multicast";
api = "noc_async_" + read_write + "_multicast";
}
else if (read_one_packet_g) {
api = "noc_async_read_one_packet";
api = "noc_async_" + read_write + "_one_packet";
}
else {
api = "noc_async_read";
api = "noc_async_" + read_write;
}
log_info(LogTest, "Using API: {}", api);
log_info(LogTest, "Lazy: {}", lazy_g);
Expand Down

0 comments on commit e7e4517

Please sign in to comment.