Merge branch 'mengfeil/optimize-test-workflow' into mengfeil/latest-test

intel · Jul 10, 2024 · 443fd35 · 443fd35
2 parents 1c1c1b6 + 7283c75
commit 443fd35
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 40 deletions.
diff --git a/.github/ci_expected_accuracy/check_expected.py b/.github/ci_expected_accuracy/check_expected.py
@@ -48,19 +48,19 @@
         passed_models.append([model_name, test_accuracy])
         if refer_accuracy == "N/A":
             new_models.append([model_name, test_accuracy])
-            refer_data.loc[refer_data.tail(1).index.tolist()[0] + 1,:] = "N/A"
-            refer_data.at[refer_data.tail(1).index, "name"] = model_name
-            refer_data.at[refer_data.tail(1).index, args.dtype] = test_accuracy
+            refer_data.loc[len(refer_data),:] = "N/A"
+            refer_data.at[len(refer_data) - 1, "name"] = model_name
+            refer_data.at[len(refer_data) - 1, args.dtype] = test_accuracy
         elif 'pass' not in refer_accuracy:
             new_pass_models.append([model_name, test_accuracy])
             refer_data.at[refer_row[0], args.dtype] = test_accuracy
     else:
         if refer_accuracy == "N/A":
             new_models.append([model_name, test_accuracy])
             real_failed_models.append([model_name, test_accuracy])
-            refer_data.loc[refer_data.tail(1).index.tolist()[0] + 1,:] = "N/A"
-            refer_data.at[refer_data.tail(1).index, "name"] = model_name
-            refer_data.at[refer_data.tail(1).index, args.dtype] = test_accuracy
+            refer_data.loc[len(refer_data),:] = "N/A"
+            refer_data.at[len(refer_data) - 1, "name"] = model_name
+            refer_data.at[len(refer_data) - 1, args.dtype] = test_accuracy
         elif "pass" in refer_accuracy:
             real_failed_models.append([model_name, test_accuracy])
         else:
@@ -80,7 +80,7 @@
 print("Pass rate: {:.2f}%".format(len(passed_models) / len(model_names) * 100))
 
 if len(new_pass_models + new_models) > 0:
-    print("NOTE: New models result, please update the reference", new_pass_models)
+    print("NOTE: New models result, please update the reference", new_pass_models, new_models)
     if args.update:
         refer_data.to_csv(refer_file, sep=',', encoding='utf-8', index=False)
         print("Updated. Now, confirm the changes to .csvs and `git add` them if satisfied.")
diff --git a/.github/scripts/apply_torch_pr.py b/.github/scripts/apply_torch_pr.py
@@ -25,7 +25,7 @@
 
 # check reverted PR is in current code base or not
 def check_reverted_reopen(pr_info):
-    git_cmd = "git log nightly -n 1 2>&1 |grep 'nightly release' |head -1 |sed 's/.*(//;s/).*//' || git rev-parse HEAD"
+    git_cmd = "((git log -n 1 2>&1 |grep 'nightly release' |head -1 |sed 's/.*(//;s/).*//' || true) && git rev-parse HEAD) |head -n 1"
     git_info = subprocess.Popen(git_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
     main_commit = git_info.communicate()[0].decode("utf-8").replace("\n", "")
     revert_cmd = "cur_cmt=$(git rev-parse HEAD) && git fetch origin main > /dev/null 2>&1 && " + \
@@ -40,6 +40,39 @@ def check_reverted_reopen(pr_info):
         reverted = False
     return reverted
 
+def check_merged(pr_info):
+    git_cmd = "((git log -n 1 2>&1 |grep 'nightly release' |head -1 |sed 's/.*(//;s/).*//' || true) && git rev-parse HEAD) |head -n 1"
+    git_info = subprocess.Popen(git_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+    main_commit = git_info.communicate()[0].decode("utf-8").replace("\n", "")
+    merge_cmd = "cur_cmt=$(git rev-parse HEAD) && git fetch origin main > /dev/null 2>&1 && " + \
+                 "git checkout " + main_commit + " > /dev/null 2>&1 && " + \
+                 "git log |grep 'resolved: " + pr_info["html_url"] + "' || true && " + \
+                 "git checkout $cur_cmt > /dev/null 2>&1"
+    merge_info = subprocess.Popen(merge_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+    merge_msg = merge_info.communicate()[0].decode("utf-8")
+    if "resolved: " + pr_info["html_url"] in merge_msg:
+        merged = True
+    else:
+        merged = False
+    return merged
+
+def appyly_pr(pr_info, re_apply_msg):
+    # get pr diff
+    pr_file = pr_info["diff_url"].split("/")[-1]
+    urllib.request.urlretrieve(pr_info["diff_url"], pr_file)
+    # apply diff
+    apply_cmd = "git apply --3way " + pr_file + " && rm -f " + pr_file
+    apply_info = subprocess.Popen(apply_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
+    apply_message = apply_info.communicate()[0].decode("utf-8")
+    apply_status = apply_info.returncode
+    # apply status
+    if apply_status == 0:
+        print("{} {}, applied got SUCCESSFUL".format(pr_info["diff_url"], re_apply_msg))
+    else:
+        print("{} {}, applied got FAILED".format(pr_info["diff_url"], apply_message))
+        print(apply_status, apply_message)
+        exit(1)
+
 
 # headers = {'Authorization': 'Bearer ' + args.token} if args.token != None else args.token
 pr_list = args.pr_list + args.extra_pr_list
@@ -53,32 +86,25 @@ def check_reverted_reopen(pr_info):
     if pr_info["state"].lower() == "open":
         # for reverted PR
         reverted_id = next((item["id"] for item in pr_info["labels"] if item["name"] == "Reverted"), -1)
-        re_apply_msg = ""
+        re_apply_msg = "is opened"
         if reverted_id != -1:
             reverted = check_reverted_reopen(pr_info)
             # skip if PR not reverted but re-open in current code base
             if not reverted:
                 print("{} is re-open but not reverted, no need to apply".format(pr_info["diff_url"]))
                 continue
             else:
-                re_apply_msg = "is re-opened & reverted,"
-        # get pr diff
-        pr_file = pr_info["diff_url"].split("/")[-1]
-        urllib.request.urlretrieve(pr_info["diff_url"], pr_file)
-        # apply diff
-        apply_cmd = "git apply --3way " + pr_file + " && rm -f " + pr_file
-        apply_info = subprocess.Popen(apply_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)
-        apply_message = apply_info.communicate()[0].decode("utf-8")
-        apply_status = apply_info.returncode
-        # apply status
-        if apply_status == 0:
-            print("{} {} applied got SUCCESSFUL".format(pr_info["diff_url"], re_apply_msg))
-        else:
-            print("{} applied got FAILED".format(pr_info["diff_url"]))
-            print(apply_status, apply_message)
-            exit(1)
+                re_apply_msg = "is re-opened and reverted,"
+        appyly_pr(pr_info, re_apply_msg)
     elif pr_info["state"].lower() == "closed":
-        print("{} is ClOSED, no need to apply".format(pr_info["diff_url"]))
+        merged_id = next((item["id"] for item in pr_info["labels"] if item["name"] == "Merged"), -1)
+        re_apply_msg = "is closed but not merged"
+        if merged_id != -1:
+            merged = check_merged(pr_info)
+            if merged:
+                print("{} is closed and merged, no need to apply".format(pr_info["diff_url"]))
+                continue
+        appyly_pr(pr_info, re_apply_msg)
     else:
         print("{} is {}, no need to apply".format(pr_info["diff_url"], pr_info["state"]))
         exit(1)

diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
@@ -109,10 +109,6 @@ static void xpu_lazy_registration_or_error_fallback(
   }
 }
 
-static void xpu_force_fallback(
-    const c10::OperatorHandle& op,
-    torch::jit::Stack* stack) {}
-
 TORCH_LIBRARY_IMPL(_, XPU, m) {
   static const char* enable_xpu_fallback =
       getenv("PYTORCH_ENABLE_XPU_FALLBACK");

diff --git a/src/ATen/native/xpu/sycl/IndexingUtils.h b/src/ATen/native/xpu/sycl/IndexingUtils.h
@@ -99,7 +99,7 @@ static std::tuple<Tensor, int64_t, int64_t, int64_t> computeLinearIndex(
 static std::
     tuple<Tensor, Tensor, int64_t, int64_t, int64_t, std::vector<int64_t>>
     makeLinearIndex(Tensor self, IOptTensorListRef orig, bool check_range) {
-  checkIndexTensorTypes(orig, /*allow_int*/ true);
+  checkIndexTensorTypes(orig);
   // first expand BoolTensor (masks) or ByteTensor (masks) into 1 or more
   // LongTensors
   auto indices = expandTensors(self, orig);

diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
@@ -128,6 +128,18 @@
     # Greatest absolute difference: 0.03125 at index (610,) (up to 0.001 allowed)
     # Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
     "test_compare_cpu_hypot_xpu_bfloat16",
+
+    # Regressions due to PyTorch uplift (Numeric difference in float and bfloat)
+    # https://github.com/intel/torch-xpu-ops/issues/549
+    # Example fail log
+    # FAILED test_ops_xpu.py::TestCommonXPU::test_compare_cpu_nn_functional_batch_norm_xpu_float16 - AssertionError: Tensor-likes are not close!
+    # Mismatched elements: 3 / 72 (4.2%)
+    # Greatest absolute difference: 0.0029296875 at index (0, 1, 1, 0) (up to 0.001 allowed)
+    # Greatest relative difference: 0.0032501220703125 at index (2, 1, 2, 1) (up to 0.001 allowed)
+    "test_compare_cpu_nn_functional_batch_norm_xpu_float16",
+    "test_compare_cpu_std_mean_xpu_bfloat16",
+    "test_compare_cpu_sub_xpu_float16",
+    "test_compare_cpu_var_mean_xpu_bfloat16",
 )
 
 

diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
@@ -1380,6 +1380,11 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     # https://github.com/intel/torch-xpu-ops/issues/461
     "test_index_put_src_datatype_xpu_float8_e5m2",
     "test_index_put_src_datatype_xpu_float8_e4m3fn",
+
+    # Regression after PyTorch update
+    # http://github.com/intel/torch-xpu-ops/issues/549
+    # IndexError: tensors used as indices must be long, byte or bool tensors.
+    "test_index_ind_dtype_xpu",
 )
 res += launch_test("test_indexing_xpu.py", skip_list)
 
@@ -2995,23 +3000,21 @@ def launch_test(test_case, skip_list=None, exe_list=None):
 res += launch_test("nn/test_convolution_xpu.py", skip_list)
 
 # test_dynamic_shapes
-
-
-res += launch_test("test_dynamic_shapes_xpu.py")
+skip_list = (
+    # Regression after PyTorch uplift
+    # https://github.com/intel/torch-xpu-ops/issues/549
+    # AssertionError: 3 != 3.0
+    "test_symnode_hashing",
+)
+res += launch_test("test_dynamic_shapes_xpu.py", skip_list)
 
 # test_load_state_dict
-
-
 res += launch_test("nn/test_load_state_dict_xpu.py")
 
 # test_module_hooks
-
-
 res += launch_test("nn/test_module_hooks_xpu.py")
 
 # test_parametrization
-
-
 res += launch_test("nn/test_parametrization_xpu.py")
 
 exit_code = os.WEXITSTATUS(res)