diff --git a/ggml.c b/ggml.c index 03ec5349b907fe..1a3f2a06fcb17c 100644 --- a/ggml.c +++ b/ggml.c @@ -16559,22 +16559,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { } else { // wait for other threads to finish const int last = node_n; - - const bool do_yield = last < 0 || cgraph->nodes[last]->op == GGML_OP_MUL_MAT; - - while (true) { - // TODO: this sched_yield can have significant impact on the performance - either positive or negative - // depending on the workload and the operating system. - // since it is not clear what is the best approach, it should potentially become user-configurable - // ref: https://github.com/ggerganov/ggml/issues/291 - // UPD: adding the do_yield flag seems to resolve the issue universally - if (do_yield) { - sched_yield(); - } - + do { + #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_METAL) + //apple does nothing + #else + sched_yield(); + #endif node_n = atomic_load(&state->shared->node_n); - if (node_n != last) break; - }; + } while (node_n == last); } // check if we should stop