Skip to content

Commit

Permalink
#0: Add prefetch_relay_paged_packed test regressing on L1 cache overf…
Browse files Browse the repository at this point in the history
…low (#10859)
  • Loading branch information
tt-asaigal authored Jul 29, 2024
1 parent 33373a2 commit 658d030
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 2 deletions.
7 changes: 7 additions & 0 deletions tests/scripts/run_cpp_fd2_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ run_test() {
echo
};

run_test_with_watcher() {
echo $1
TT_METAL_WATCHER=1 $1
echo
};

run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 0 -i 5" # TrueSmoke Test
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 5" # Smoke Test
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 5" # Random Test
Expand All @@ -37,6 +43,7 @@ run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 5 -i 5" # Paged DRAM Write + Read Test
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 6 -i 5" # Host Test
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 7 -i 5" # Packed Read Test
run_test_with_watcher "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 7 -i 10 -x -mpps" # Packed Read Test w/ max num subcmds
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 1 -i 1000 -rb" # Smoke Test
run_test "./build/test/tt_metal/perf_microbenchmark/dispatch/test_prefetcher -t 2 -i 1000 -rb" # Random Test

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ bool split_prefetcher_g;
bool split_dispatcher_g;
uint32_t prefetch_d_buffer_size_g;
bool use_dram_exec_buf_g = false;
bool relay_max_packed_paged_submcds = false;
uint32_t exec_buf_log_page_size_g;

CoreCoord first_worker_g = { 0, 1 };
Expand Down Expand Up @@ -135,6 +136,7 @@ void init(int argc, char **argv) {
log_info(LogTest, " -packetized_en: packetized path enabled (default false)");
log_info(LogTest, " -device_id: Device on which the test will be run, default = 0");
log_info(LogTest, " -x: execute commands from dram exec_buf (default 0)");
log_info(LogTest, " -mpps: give prefetcher the maximum number of packed data submcds to relay to dispatcher");
log_info(LogTest, "-xpls: execute buffer log dram page size (default {})", DRAM_EXEC_BUF_DEFAULT_LOG_PAGE_SIZE);
log_info(LogTest, " -s: seed for randomized tests (default 1)");
exit(0);
Expand All @@ -160,6 +162,7 @@ void init(int argc, char **argv) {
split_prefetcher_g = test_args::has_command_option(input_args, "-spre");
split_dispatcher_g = test_args::has_command_option(input_args, "-sdis");
use_dram_exec_buf_g = test_args::has_command_option(input_args, "-x");
relay_max_packed_paged_submcds = test_args::has_command_option(input_args, "-mpps");
exec_buf_log_page_size_g = test_args::get_command_option_uint32(input_args, "-xpls", DRAM_EXEC_BUF_DEFAULT_LOG_PAGE_SIZE);

packetized_path_en_g = test_args::has_command_option(input_args, "-packetized_en");
Expand Down Expand Up @@ -882,7 +885,7 @@ void gen_packed_read_test(Device *device,
bool done = false;
while (!done) {
uint32_t packed_read_page_size = std::rand() % 3 + 9; // 512, 1024, 2048
uint32_t n_sub_cmds = (std::rand() % 7) + 1;
uint32_t n_sub_cmds = relay_max_packed_paged_submcds ? CQ_PREFETCH_CMD_RELAY_PAGED_PACKED_MAX_SUB_CMDS : (std::rand() % 7) + 1;

vector<uint32_t> lengths;
uint32_t total_length = 0;
Expand Down
8 changes: 7 additions & 1 deletion tt_metal/impl/dispatch/kernels/cq_prefetch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ constexpr uint32_t max_read_packed_cmd =
constexpr uint32_t l1_cache_elements = max_read_packed_cmd + 1; // +1 for sentinel value
constexpr uint32_t l1_cache_elements_rounded =
((l1_cache_elements + l1_to_local_cache_copy_chunk - 1) / l1_to_local_cache_copy_chunk) *
l1_to_local_cache_copy_chunk + (l1_to_local_cache_copy_chunk - 1);;
l1_to_local_cache_copy_chunk + (l1_to_local_cache_copy_chunk - 1);

static uint32_t l1_cache[l1_cache_elements_rounded];

Expand Down Expand Up @@ -775,6 +775,9 @@ uint32_t process_relay_paged_packed_cmd(uint32_t cmd_ptr,
}

uint32_t amt = sub_cmds_length / sizeof(uint32_t);
// Check that the final write does not overflow the L1 cache and corrupt the stack
// End address of final write is: curr_offset_into_cache + write_size_rounded_up_to_copy_chunk
ASSERT((uint32_t)(l1_cache_pos + ((amt + l1_to_local_cache_copy_chunk - 1) / l1_to_local_cache_copy_chunk) * l1_to_local_cache_copy_chunk - l1_cache) < l1_cache_elements_rounded);
careful_copy_from_l1_to_local_cache<l1_to_local_cache_copy_chunk, l1_cache_elements_rounded>((volatile uint32_t tt_l1_ptr *)(data_ptr), amt, l1_cache_pos);
// Store a sentinal non 0 value at the end to save a test/branch in read path
((CQPrefetchRelayPagedPackedSubCmd *)&l1_cache_pos[amt])->length = 1;
Expand Down Expand Up @@ -1020,6 +1023,9 @@ static uint32_t process_exec_buf_relay_paged_packed_cmd(uint32_t& cmd_ptr,
}

uint32_t amt = sub_cmds_length / sizeof(uint32_t);
// Check that the final write does not overflow the L1 cache and corrupt the stack.
// End address of final write is: curr_offset_into_cache + write_size_rounded_up_to_copy_chunk
ASSERT((uint32_t)(l1_cache_pos + ((amt + l1_to_local_cache_copy_chunk - 1) / l1_to_local_cache_copy_chunk) * l1_to_local_cache_copy_chunk - l1_cache) < l1_cache_elements_rounded);
careful_copy_from_l1_to_local_cache<l1_to_local_cache_copy_chunk, l1_cache_elements_rounded>(l1_ptr, amt, l1_cache_pos);
// Store a sentinal non 0 value at the end to save a test/branch in read path
((CQPrefetchRelayPagedPackedSubCmd *)&l1_cache_pos[amt])->length = 1;
Expand Down

0 comments on commit 658d030

Please sign in to comment.