diff --git a/ggml.c b/ggml.c
index 03ec5349b907fe..1a3f2a06fcb17c 100644
--- a/ggml.c
+++ b/ggml.c
@@ -16559,22 +16559,14 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
         } else {
            // wait for other threads to finish
             const int last = node_n;
-
-            const bool do_yield = last < 0 || cgraph->nodes[last]->op == GGML_OP_MUL_MAT;
-
-            while (true) {
-                // TODO: this sched_yield can have significant impact on the performance - either positive or negative
-                //       depending on the workload and the operating system.
-                //       since it is not clear what is the best approach, it should potentially become user-configurable
-                //       ref: https://github.com/ggerganov/ggml/issues/291
-                // UPD:  adding the do_yield flag seems to resolve the issue universally
-                if (do_yield) {
-                    sched_yield();
-                }
-
+            do {
+                #if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_METAL)
+                //apple does nothing
+                #else
+                sched_yield();
+                #endif
                 node_n = atomic_load(&state->shared->node_n);
-                if (node_n != last) break;
-            };
+            } while (node_n == last);
         }
 
         // check if we should stop