diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index db6b570b17..f670cbd918 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -22,7 +22,7 @@ jobs:
       - name: Linting
         run: pre-commit run --all-files
       - name: Format c/cuda codes with clang-format
-        uses: DoozyX/clang-format-lint-action@v0.13
+        uses: DoozyX/clang-format-lint-action@v0.18
         with:
           source: mmcv/ops/csrc
           extensions: h,c,cpp,hpp,cu,cuh
diff --git a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
index 537610416e..44fbc4f338 100644
--- a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
@@ -222,9 +222,9 @@ __global__ void bezier_align_backward_cuda_kernel(
           atomicAdd(offset_bottom_diff + y_high * width + x_high,
                     static_cast<T>(g4));
         }  // if
-      }    // ix
-    }      // iy
-  }        // CUDA_1D_KERNEL_LOOP
+      }  // ix
+    }  // iy
+  }  // CUDA_1D_KERNEL_LOOP
 }  // BezierAlignBackward
 
 #endif  // BEZIER_ALIGN_CUDA_KERNEL_CUH
diff --git a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
index 4383d9e82c..4c0ff1f7c5 100644
--- a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
@@ -234,9 +234,9 @@ __global__ void riroi_align_rotated_backward_cuda_kernel(
                     g4 * l_var);
 
         }  // if
-      }    // ix
-    }      // iy
-  }        // CUDA_1D_KERNEL_LOOP
+      }  // ix
+    }  // iy
+  }  // CUDA_1D_KERNEL_LOOP
 }  // RiRoIAlignBackward
 
 #endif  // RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
diff --git a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
index 8274dc50c7..2fd465eaa2 100644
--- a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
@@ -194,9 +194,9 @@ __global__ void roi_align_rotated_backward_cuda_kernel(
           atomicAdd(offset_bottom_diff + y_high * width + x_low, g3);
           atomicAdd(offset_bottom_diff + y_high * width + x_high, g4);
         }  // if
-      }    // ix
-    }      // iy
-  }        // CUDA_1D_KERNEL_LOOP
+      }  // ix
+    }  // iy
+  }  // CUDA_1D_KERNEL_LOOP
 }  // RoIAlignBackward
 
 #endif  // ROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
diff --git a/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp b/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
index 8527372241..0518806208 100644
--- a/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
+++ b/mmcv/ops/csrc/common/mlu/common_mlu_helper.hpp
@@ -33,7 +33,7 @@
 #define PAD_DOWN(x, y) (((x) / (y)) * (y))
 #endif
 
-#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))
+#define CEIL_ALIGN(x, y) (((x) + (y) - 1) / (y) * (y))
 
 template <typename scalar_t>
 __mlu_func__ inline scalar_t min(scalar_t a, scalar_t b) {
diff --git a/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp b/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
index e49572ca84..1b264a9a1e 100644
--- a/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
+++ b/mmcv/ops/csrc/common/pytorch_mlu_helper.hpp
@@ -21,9 +21,9 @@
 
 #define PAD_DOWN(x, y) (((x) / (y)) * (y))
 
-#define CEIL_DIV(x, y) (((x) + (y)-1) / (y))
+#define CEIL_DIV(x, y) (((x) + (y) - 1) / (y))
 
-#define CEIL_ALIGN(x, y) (((x) + (y)-1) / (y) * (y))
+#define CEIL_ALIGN(x, y) (((x) + (y) - 1) / (y) * (y))
 
 inline int32_t getJobLimitCapability() {
   CNcontext drv_ctx;
diff --git a/mmcv/ops/csrc/common/pytorch_npu_util.hpp b/mmcv/ops/csrc/common/pytorch_npu_util.hpp
index 3c3712a933..aed56ca1e0 100644
--- a/mmcv/ops/csrc/common/pytorch_npu_util.hpp
+++ b/mmcv/ops/csrc/common/pytorch_npu_util.hpp
@@ -468,7 +468,7 @@ void ReleaseConvertTypes(Tuple &t) {
 }
 
 template <typename... Ts>
-constexpr auto ConvertTypes(Ts &... args) {
+constexpr auto ConvertTypes(Ts &...args) {
   return std::make_tuple(ConvertType(args)...);
 }
 
@@ -506,7 +506,7 @@ void AddParamToBuf(const string &);
 void AddParamToBuf();
 
 template <typename T, typename... Args>
-void AddParamToBuf(const T &arg, Args &... args) {
+void AddParamToBuf(const T &arg, Args &...args) {
   AddParamToBuf(arg);
   AddParamToBuf(args...);
 }
diff --git a/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h b/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
index f23ff44823..a4b9024715 100644
--- a/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
+++ b/mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
@@ -40,7 +40,7 @@ void assigner(TT &src, std::vector<int> counter, std::vector<scalar_t> &arg) {
 
 template <int Idx, class TT, class scalar_t, class... TArgs>
 void assigner(TT &src, std::vector<int> counter, std::vector<scalar_t> &arg,
-              std::vector<TArgs> &... args) {
+              std::vector<TArgs> &...args) {
   std::get<Idx>(src) = arg[counter[Idx]];
   assigner<Idx + 1>(src, counter, args...);
 }
diff --git a/mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h b/mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
index 66e01a8ed1..88eca8084c 100644
--- a/mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
+++ b/mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
@@ -108,7 +108,7 @@ struct CPU {};
 template <typename scalar_t, size_t MaxDim = TV_MAX_DIM>
 struct SimpleVector {
  public:
-  TV_HOST_DEVICE_INLINE SimpleVector(){};
+  TV_HOST_DEVICE_INLINE SimpleVector() {};
   TV_HOST_DEVICE_INLINE SimpleVector(std::initializer_list<scalar_t> q) {
     TV_ASSERT(q.size() <= MaxDim);
     mSize = 0;
@@ -315,7 +315,7 @@ struct Slice {
 
 template <size_t MaxDim = TV_MAX_DIM>
 struct ShapeBase : public SimpleVector<int, MaxDim> {
-  TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>(){};
+  TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>() {};
   TV_HOST_DEVICE_INLINE ShapeBase(std::initializer_list<int> shape)
       : SimpleVector<int, MaxDim>(shape) {}
 
diff --git a/mmcv/ops/csrc/pytorch/cpu/bezier_align.cpp b/mmcv/ops/csrc/pytorch/cpu/bezier_align.cpp
index 7eb0e5b940..21e2c5fc87 100644
--- a/mmcv/ops/csrc/pytorch/cpu/bezier_align.cpp
+++ b/mmcv/ops/csrc/pytorch/cpu/bezier_align.cpp
@@ -220,9 +220,9 @@ void BezierAlignForward(const int nthreads, const T *input, const T *rois,
 
           output[index] = output_val;
         }  // for pw
-      }    // for ph
-    }      // for c
-  }        // for n
+      }  // for ph
+    }  // for c
+  }  // for n
 }
 
 template <typename T>
@@ -381,9 +381,9 @@ void BezierAlignBackward(const int nthreads, const T *grad_output,
           add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
           add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
         }  // if
-      }    // ix
-    }      // iy
-  }        // for
+      }  // ix
+    }  // iy
+  }  // for
 }  // BezierAlignBackward
 
 void BezierAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
diff --git a/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp b/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp
index d545390645..fc17a0b122 100644
--- a/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp
+++ b/mmcv/ops/csrc/pytorch/cpu/roi_align.cpp
@@ -207,10 +207,10 @@ void ROIAlignForward(const int nthreads, const T* input, const T* rois,
             // We do average (integral) pooling inside a bin
             output[index] = output_val / count;
           }  // if
-        }    // for pw
-      }      // for ph
-    }        // for c
-  }          // for n
+        }  // for pw
+      }  // for ph
+    }  // for c
+  }  // for n
 }
 
 template <typename T>
@@ -334,7 +334,7 @@ void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois,
           add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
           add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
         }  // if
-      }    // mode
+      }  // mode
     } else if (pool_mode == 1) {
       // We do average (integral) pooling inside a bin
       // We use roi_bin_grid to sample the grid and mimic integral
@@ -375,10 +375,10 @@ void ROIAlignBackward(const int nthreads, const T* grad_output, const T* rois,
             add(offset_grad_input + y_high * width + x_high,
                 static_cast<T>(g4));
           }  // if
-        }    // ix
-      }      // iy
-    }        // mode
-  }          // for
+        }  // ix
+      }  // iy
+    }  // mode
+  }  // for
 }  // ROIAlignBackward
 
 void ROIAlignForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
diff --git a/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp b/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
index 8c849de0cb..6e09ed1517 100644
--- a/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
+++ b/mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
@@ -206,9 +206,9 @@ void ROIAlignRotatedForward(const int nthreads, const T* input,
 
           output[index] = output_val;
         }  // for pw
-      }    // for ph
-    }      // for c
-  }        // for n
+      }  // for ph
+    }  // for c
+  }  // for n
 }
 
 template <typename T>
@@ -366,9 +366,9 @@ void ROIAlignRotatedBackward(
           add(offset_grad_input + y_high * width + x_low, static_cast<T>(g3));
           add(offset_grad_input + y_high * width + x_high, static_cast<T>(g4));
         }  // if
-      }    // ix
-    }      // iy
-  }        // for
+      }  // ix
+    }  // iy
+  }  // for
 }  // ROIAlignRotatedBackward
 
 void ROIAlignRotatedForwardCPULauncher(Tensor input, Tensor rois, Tensor output,
diff --git a/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu b/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu
index cee9b26168..e1040fd681 100644
--- a/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu
+++ b/mmcv/ops/csrc/pytorch/cuda/filtered_lrelu.cu
@@ -100,8 +100,9 @@ void *choose_filtered_lrelu_act_kernel(void);
 //------------------------------------------------------------------------
 // Helpers.
 
-enum              // Filter modes.
-{ MODE_SUSD = 0,  // Separable upsampling, separable downsampling.
+enum  // Filter modes.
+{
+  MODE_SUSD = 0,  // Separable upsampling, separable downsampling.
   MODE_FUSD = 1,  // Full upsampling, separable downsampling.
   MODE_SUFD = 2,  // Separable upsampling, full downsampling.
   MODE_FUFD = 3,  // Full upsampling, full downsampling.
@@ -157,12 +158,11 @@ struct InternalType<c10::Half> {
 
 #define MIN(A, B) ((A) < (B) ? (A) : (B))
 #define MAX(A, B) ((A) > (B) ? (A) : (B))
-#define CEIL_DIV(A, B)                                   \
-  (((B) == 1)                                            \
-       ? (A)                                             \
-       : ((B) == 2) ? ((int)((A) + 1) >> 1)              \
-                    : ((B) == 4) ? ((int)((A) + 3) >> 2) \
-                                 : (((A) + ((A) > 0 ? (B)-1 : 0)) / (B)))
+#define CEIL_DIV(A, B)                  \
+  (((B) == 1)   ? (A)                   \
+   : ((B) == 2) ? ((int)((A) + 1) >> 1) \
+   : ((B) == 4) ? ((int)((A) + 3) >> 2) \
+                : (((A) + ((A) > 0 ? (B) - 1 : 0)) / (B)))
 
 // This works only up to blocks of size 256 x 256 and for all N that are powers
 // of two.
@@ -333,22 +333,16 @@ static __global__ void filtered_lrelu_kernel(filtered_lrelu_kernel_params p) {
   const int szDownX = tileUpH * tileOutW;
 
   // Sizes for shared memory arrays.
-  const int s_buf0_size_base =
-      (filterMode == MODE_SUSD)
-          ? MAX(szIn, szUpXY)
-          : (filterMode == MODE_FUSD)
-                ? MAX(szIn, szDownX)
-                : (filterMode == MODE_SUFD)
-                      ? MAX(szIn, szUpXY)
-                      : (filterMode == MODE_FUFD) ? szIn : -1;
-  const int s_buf1_size_base =
-      (filterMode == MODE_SUSD)
-          ? MAX(szUpX, szDownX)
-          : (filterMode == MODE_FUSD)
-                ? szUpXY
-                : (filterMode == MODE_SUFD)
-                      ? szUpX
-                      : (filterMode == MODE_FUFD) ? szUpXY : -1;
+  const int s_buf0_size_base = (filterMode == MODE_SUSD)   ? MAX(szIn, szUpXY)
+                               : (filterMode == MODE_FUSD) ? MAX(szIn, szDownX)
+                               : (filterMode == MODE_SUFD) ? MAX(szIn, szUpXY)
+                               : (filterMode == MODE_FUFD) ? szIn
+                                                           : -1;
+  const int s_buf1_size_base = (filterMode == MODE_SUSD)   ? MAX(szUpX, szDownX)
+                               : (filterMode == MODE_FUSD) ? szUpXY
+                               : (filterMode == MODE_SUFD) ? szUpX
+                               : (filterMode == MODE_FUFD) ? szUpXY
+                                                           : -1;
 
   // Ensure U128 alignment.
   const int s_buf0_size = (s_buf0_size_base + 3) & ~3;
@@ -980,17 +974,17 @@ static __global__ void filtered_lrelu_kernel(filtered_lrelu_kernel_params p) {
 
 #define X_LOOP(TAPY, PX)                                             \
   for (int sx = 0; sx < fuSize / up; sx++) {                         \
-    v.x += a * (scalar_t)c_fu[(sx * up + (((PX)-0) & (up - 1))) +    \
+    v.x += a * (scalar_t)c_fu[(sx * up + (((PX) - 0) & (up - 1))) +  \
                               (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
-    v.z += b * (scalar_t)c_fu[(sx * up + (((PX)-0) & (up - 1))) +    \
+    v.z += b * (scalar_t)c_fu[(sx * up + (((PX) - 0) & (up - 1))) +  \
                               (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
     if ((PX) == 0) {                                                 \
       a = b;                                                         \
       b = s_tileIn[src0 + 2 + sx + sy * tileInW];                    \
     }                                                                \
-    v.y += a * (scalar_t)c_fu[(sx * up + (((PX)-1) & (up - 1))) +    \
+    v.y += a * (scalar_t)c_fu[(sx * up + (((PX) - 1) & (up - 1))) +  \
                               (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
-    v.w += b * (scalar_t)c_fu[(sx * up + (((PX)-1) & (up - 1))) +    \
+    v.w += b * (scalar_t)c_fu[(sx * up + (((PX) - 1) & (up - 1))) +  \
                               (sy * up + (TAPY)) * MAX_FILTER_SIZE]; \
     if ((PX) == 1) {                                                 \
       a = b;                                                         \
@@ -1447,7 +1441,7 @@ static __global__ void filtered_lrelu_act_kernel(
         s |= __shfl_xor(s, 4);
         s |= __shfl_xor(s, 8);
 #else
-        s |= __shfl_xor_sync(m, s, 1);                  // Distribute.
+        s |= __shfl_xor_sync(m, s, 1);  // Distribute.
         s |= __shfl_xor_sync(m, s, 2);
         s |= __shfl_xor_sync(m, s, 4);
         s |= __shfl_xor_sync(m, s, 8);
diff --git a/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu b/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu
index 2f1ae0a683..63f6e11e94 100644
--- a/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu
+++ b/mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu
@@ -225,9 +225,9 @@ static __global__ void upfirdn2d_kernel_small(upfirdn2d_kernel_params p) {
         scalar_t v = 0;
         if (inX >= 0 & inY >= 0 & inX < p.inSize.x & inY < p.inSize.y &
             c < p.inSize.z)
-          v = (scalar_t)(
-              (const T *)p.x)[inX * p.inStride.x + inY * p.inStride.y +
-                              c * p.inStride.z + n * p.inStride.w];
+          v = (scalar_t)((const T *)
+                             p.x)[inX * p.inStride.x + inY * p.inStride.y +
+                                  c * p.inStride.z + n * p.inStride.w];
         sx[relInY][relInX][relC] = v;
       }
 
diff --git a/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp b/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
index 4a110ae07c..c93c0635cf 100644
--- a/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/bbox_overlaps_npu.cpp
@@ -26,7 +26,7 @@ void bbox_overlaps_npu(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
     gtboxesFP32 = gtboxesFP32.to(at::kFloat);
   }
   c10::SmallVector<int64_t, 8> iousSize = {gtboxesFP32.size(0),
-                                              bboxesFP32.size(0)};
+                                           bboxesFP32.size(0)};
   if (aligned) {
     iousSize = {gtboxesFP32.size(0), 1};
   }
diff --git a/mmcv/ops/csrc/pytorch/npu/box_iou_quadri_npu.cpp b/mmcv/ops/csrc/pytorch/npu/box_iou_quadri_npu.cpp
index 6baf44f448..84e2c0a678 100644
--- a/mmcv/ops/csrc/pytorch/npu/box_iou_quadri_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/box_iou_quadri_npu.cpp
@@ -4,11 +4,10 @@ using namespace NPU_NAME_SPACE;
 using namespace std;
 
 void box_iou_quadri_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned);
+                         const int mode_flag, const bool aligned);
 
 void box_iou_quadri_npu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                         const int mode_flag, const bool aligned) {
-
+                        const int mode_flag, const bool aligned) {
   TORCH_CHECK(boxes1.size(1) == 8, "boxes1 must be 2D tensor (N, 8)");
   TORCH_CHECK(boxes1.size(1) == 8, "boxes1 must be 2D tensor (N, 8)");
 
diff --git a/mmcv/ops/csrc/pytorch/npu/box_iou_rotated_npu.cpp b/mmcv/ops/csrc/pytorch/npu/box_iou_rotated_npu.cpp
index d8b0bbaa67..5b229a3926 100644
--- a/mmcv/ops/csrc/pytorch/npu/box_iou_rotated_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/box_iou_rotated_npu.cpp
@@ -8,14 +8,13 @@ void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
 
 void box_iou_rotated_npu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
                          const int mode_flag, const bool aligned) {
-
   TORCH_CHECK(boxes1.size(1) == 5, "boxes1 must be 2D tensor (N, 5)");
   TORCH_CHECK(boxes1.size(1) == 5, "boxes1 must be 2D tensor (N, 5)");
 
   auto trans = false;
   auto is_clockwise = false;
   EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes1, boxes2, trans, is_clockwise,
-                aligned, mode_flag, ious);
+               aligned, mode_flag, ious);
   return;
 }
 
diff --git a/mmcv/ops/csrc/pytorch/npu/boxes_overlap_bev_npu.cpp b/mmcv/ops/csrc/pytorch/npu/boxes_overlap_bev_npu.cpp
index 6bc6273083..0d97df6ad7 100644
--- a/mmcv/ops/csrc/pytorch/npu/boxes_overlap_bev_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/boxes_overlap_bev_npu.cpp
@@ -10,16 +10,17 @@ void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
 void iou3d_boxes_overlap_bev_forward_npu(const int num_a, const Tensor boxes_a,
                                          const int num_b, const Tensor boxes_b,
                                          Tensor ans_overlap) {
+  TORCH_CHECK(boxes_a.size(1) == 7, "boxes_a must be 2D tensor (N, 7)");
+  TORCH_CHECK(boxes_b.size(1) == 7, "boxes_b must be 2D tensor (N, 7)");
 
-    TORCH_CHECK(boxes_a.size(1) == 7, "boxes_a must be 2D tensor (N, 7)");
-    TORCH_CHECK(boxes_b.size(1) == 7, "boxes_b must be 2D tensor (N, 7)");
-
-    auto trans = false;
-    auto is_clockwise = false;
-    auto aligned = false;
-    auto mode_flag = 2;
-    EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes_a, boxes_b, trans, is_clockwise, aligned, mode_flag, ans_overlap);
-    return;
+  auto trans = false;
+  auto is_clockwise = false;
+  auto aligned = false;
+  auto mode_flag = 2;
+  EXEC_NPU_CMD(aclnnBoxesOverlapBev, boxes_a, boxes_b, trans, is_clockwise,
+               aligned, mode_flag, ans_overlap);
+  return;
 }
 
-REGISTER_NPU_IMPL(iou3d_boxes_overlap_bev_forward_impl, iou3d_boxes_overlap_bev_forward_npu);
+REGISTER_NPU_IMPL(iou3d_boxes_overlap_bev_forward_impl,
+                  iou3d_boxes_overlap_bev_forward_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/points_in_box_npu_all.cpp b/mmcv/ops/csrc/pytorch/npu/points_in_box_npu_all.cpp
index ae3befea63..e3cc9284cc 100644
--- a/mmcv/ops/csrc/pytorch/npu/points_in_box_npu_all.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/points_in_box_npu_all.cpp
@@ -4,16 +4,17 @@ using namespace NPU_NAME_SPACE;
 using namespace std;
 
 void points_in_boxes_all_forward_impl_npu(int batch_size, int boxes_num,
-                                           int pts_num, const Tensor boxes,
-                                           const Tensor pts,
-                                           Tensor box_idx_of_points) {
-  c10::SmallVector<int64_t, 8> output_size = {pts.size(0), pts.size(1), boxes.size(1)};
+                                          int pts_num, const Tensor boxes,
+                                          const Tensor pts,
+                                          Tensor box_idx_of_points) {
+  c10::SmallVector<int64_t, 8> output_size = {pts.size(0), pts.size(1),
+                                              boxes.size(1)};
   auto boxes_trans = boxes.transpose(1, 2).contiguous();
   EXEC_NPU_CMD(aclnnPointsInBoxAll, boxes_trans, pts, box_idx_of_points);
 }
 void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
-                                       int pts_num, const Tensor boxes,
-                                       const Tensor pts,
-                                       Tensor box_idx_of_points);
+                                      int pts_num, const Tensor boxes,
+                                      const Tensor pts,
+                                      Tensor box_idx_of_points);
 REGISTER_NPU_IMPL(points_in_boxes_all_forward_impl,
                   points_in_boxes_all_forward_impl_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_v2_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_v2_npu.cpp
index b2ea93b261..f9fac97397 100644
--- a/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_v2_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roi_align_rotated_v2_npu.cpp
@@ -4,49 +4,41 @@ using namespace NPU_NAME_SPACE;
 using namespace std;
 
 void roi_align_rotated_v2_forward_npu(const Tensor input, Tensor rois_map,
-                                    Tensor output,
-                                    double spatial_scale,
-                                    int32_t sampling_ratio,
-                                    int32_t pooled_height,
-                                    int32_t pooled_width,
-                                    bool aligned,
-                                    bool clockwise) {
+                                      Tensor output, double spatial_scale,
+                                      int32_t sampling_ratio,
+                                      int32_t pooled_height,
+                                      int32_t pooled_width, bool aligned,
+                                      bool clockwise) {
   at::Tensor feature_map = input.permute({0, 2, 3, 1}).contiguous();
   at::Tensor rois = rois_map.permute({1, 0}).contiguous();
-  EXEC_NPU_CMD(aclnnRoiAlignRotatedV2, feature_map, rois, spatial_scale, sampling_ratio, pooled_height, pooled_width, aligned, clockwise, output);
+  EXEC_NPU_CMD(aclnnRoiAlignRotatedV2, feature_map, rois, spatial_scale,
+               sampling_ratio, pooled_height, pooled_width, aligned, clockwise,
+               output);
 }
 
 void roi_align_rotated_v2_forward_impl(const Tensor input, Tensor rois,
-                                    Tensor output,
-                                    double spatial_scale,
-                                    int32_t sampling_ratio,
-                                    int32_t pooled_height,
-                                    int32_t pooled_width,
-                                    bool aligned,
-                                    bool clockwise);
+                                       Tensor output, double spatial_scale,
+                                       int32_t sampling_ratio,
+                                       int32_t pooled_height,
+                                       int32_t pooled_width, bool aligned,
+                                       bool clockwise);
 
-REGISTER_NPU_IMPL(roi_align_rotated_v2_forward_impl, roi_align_rotated_v2_forward_npu);
+REGISTER_NPU_IMPL(roi_align_rotated_v2_forward_impl,
+                  roi_align_rotated_v2_forward_npu);
 
-void roi_align_rotated_v2_backward_npu(const Tensor input, Tensor rois,
-                                    Tensor grad_output, Tensor grad_input,
-                                    int32_t pooled_height,
-                                    int32_t pooled_width,
-                                    double spatial_scale,
-                                    int32_t sampling_ratio,
-                                    bool aligned,
-                                    bool clockwise) {
+void roi_align_rotated_v2_backward_npu(
+    const Tensor input, Tensor rois, Tensor grad_output, Tensor grad_input,
+    int32_t pooled_height, int32_t pooled_width, double spatial_scale,
+    int32_t sampling_ratio, bool aligned, bool clockwise) {
   EXEC_NPU_CMD(aclnnRoiAlignRotatedGradV2, input, rois, grad_output,
-               pooled_height, pooled_width, spatial_scale, sampling_ratio, aligned, clockwise,
-               grad_input);
+               pooled_height, pooled_width, spatial_scale, sampling_ratio,
+               aligned, clockwise, grad_input);
 }
 
-void roi_align_rotated_v2_backward_impl(const Tensor input, Tensor rois,
-                                    Tensor grad_output, Tensor grad_input,
-                                    int32_t pooled_height,
-                                    int32_t pooled_width,
-                                    double spatial_scale,
-                                    int32_t sampling_ratio,
-                                    bool aligned,
-                                    bool clockwise);
+void roi_align_rotated_v2_backward_impl(
+    const Tensor input, Tensor rois, Tensor grad_output, Tensor grad_input,
+    int32_t pooled_height, int32_t pooled_width, double spatial_scale,
+    int32_t sampling_ratio, bool aligned, bool clockwise);
 
-REGISTER_NPU_IMPL(roi_align_rotated_v2_backward_impl, roi_align_rotated_v2_backward_npu);
+REGISTER_NPU_IMPL(roi_align_rotated_v2_backward_impl,
+                  roi_align_rotated_v2_backward_npu);
diff --git a/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp b/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
index 2fc645c62b..7d9e4b3c9d 100644
--- a/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
@@ -19,7 +19,7 @@ void roipoint_pool3d_forward_impl_npu(int batch_size, int pts_num,
   at::Tensor pooled_features_trans =
       at::empty(features_trans_size, xyz.options());
   c10::SmallVector<int64_t, 8> empty_flag_size = {boxes3d.size(0),
-                                                     boxes3d.size(1)};
+                                                  boxes3d.size(1)};
   EXEC_NPU_CMD(aclnnRoipointPool3dForward, points_trans, point_features_trans,
                boxes3d, sampled_pts_num, pooled_features_trans,
                pooled_empty_flag);
diff --git a/mmcv/ops/csrc/pytorch/pybind.cpp b/mmcv/ops/csrc/pytorch/pybind.cpp
index c56c6437d1..864630f838 100644
--- a/mmcv/ops/csrc/pytorch/pybind.cpp
+++ b/mmcv/ops/csrc/pytorch/pybind.cpp
@@ -209,13 +209,15 @@ void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
                         int sampling_ratio, int pool_mode, bool aligned);
 
 void roi_align_rotated_v2_forward(Tensor input, Tensor rois, Tensor output,
-                               double spatial_scale, int sampling_ratio,
-                               int aligned_height, int aligned_width,
-                               bool aligned, bool clockwise);
+                                  double spatial_scale, int sampling_ratio,
+                                  int aligned_height, int aligned_width,
+                                  bool aligned, bool clockwise);
 
-void roi_align_rotated_v2_backward(Tensor input, Tensor rois, Tensor grad_output, Tensor grad_input,
-                               int pooled_height, int pooled_width, double spatial_scale,
-                               int sampling_ratio, bool aligned, bool clockwise);
+void roi_align_rotated_v2_backward(Tensor input, Tensor rois,
+                                   Tensor grad_output, Tensor grad_input,
+                                   int pooled_height, int pooled_width,
+                                   double spatial_scale, int sampling_ratio,
+                                   bool aligned, bool clockwise);
 
 void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
                       int pooled_height, int pooled_width, float spatial_scale);
@@ -804,13 +806,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("roi_align_rotated_v2_forward", &roi_align_rotated_v2_forward,
         "roi_align_rotated_v2_forward", py::arg("input"), py::arg("rois"),
         py::arg("output"), py::arg("spatial_scale"), py::arg("sampling_ratio"),
-        py::arg("pooled_height"), py::arg("pooled_width"),
-        py::arg("aligned"), py::arg("clockwise"));
+        py::arg("pooled_height"), py::arg("pooled_width"), py::arg("aligned"),
+        py::arg("clockwise"));
   m.def("roi_align_rotated_v2_backward", &roi_align_rotated_v2_backward,
         "roi_align_rotated_v2_backward", py::arg("input"), py::arg("rois"),
         py::arg("grad_output"), py::arg("grad_input"), py::arg("pooled_height"),
-        py::arg("pooled_width"), py::arg("spatial_scale"), py::arg("sampling_ratio"),
-        py::arg("aligned"), py::arg("clockwise"));
+        py::arg("pooled_width"), py::arg("spatial_scale"),
+        py::arg("sampling_ratio"), py::arg("aligned"), py::arg("clockwise"));
   m.def("dynamic_point_to_voxel_forward", &dynamic_point_to_voxel_forward,
         "dynamic_point_to_voxel_forward", py::arg("feats"), py::arg("coors"),
         py::arg("reduce_type"));
diff --git a/mmcv/ops/csrc/pytorch/roi_align_rotated_v2.cpp b/mmcv/ops/csrc/pytorch/roi_align_rotated_v2.cpp
index 7743775288..9b0a623530 100644
--- a/mmcv/ops/csrc/pytorch/roi_align_rotated_v2.cpp
+++ b/mmcv/ops/csrc/pytorch/roi_align_rotated_v2.cpp
@@ -3,35 +3,40 @@
 #include "pytorch_device_registry.hpp"
 
 void roi_align_rotated_v2_forward_impl(Tensor input, Tensor rois, Tensor output,
-                                    double spatial_scale, int sampling_ratio,
-                                    int pooled_height, int pooled_width,
-                                    bool aligned, bool clockwise) {
+                                       double spatial_scale, int sampling_ratio,
+                                       int pooled_height, int pooled_width,
+                                       bool aligned, bool clockwise) {
   DISPATCH_DEVICE_IMPL(roi_align_rotated_v2_forward_impl, input, rois, output,
-                       spatial_scale, sampling_ratio, pooled_height, pooled_width,
-                       aligned, clockwise);
+                       spatial_scale, sampling_ratio, pooled_height,
+                       pooled_width, aligned, clockwise);
 }
 
-
 void roi_align_rotated_v2_forward(Tensor input, Tensor rois, Tensor output,
-                               double spatial_scale, int sampling_ratio,
-                               int pooled_height, int pooled_width,
-                               bool aligned, bool clockwise) {
-  roi_align_rotated_v2_forward_impl(input, rois, output, spatial_scale, sampling_ratio,
-                                    pooled_height, pooled_width, aligned, clockwise);
+                                  double spatial_scale, int sampling_ratio,
+                                  int pooled_height, int pooled_width,
+                                  bool aligned, bool clockwise) {
+  roi_align_rotated_v2_forward_impl(input, rois, output, spatial_scale,
+                                    sampling_ratio, pooled_height, pooled_width,
+                                    aligned, clockwise);
 }
 
-
-void roi_align_rotated_v2_backward_impl(Tensor input, Tensor rois, Tensor grad_output, Tensor grad_input,
-                                    int pooled_height, int pooled_width, double spatial_scale,
-                                    int sampling_ratio, bool aligned, bool clockwise) {
-  DISPATCH_DEVICE_IMPL(roi_align_rotated_v2_backward_impl, input, rois, grad_output, grad_input,
-                    pooled_height, pooled_width, spatial_scale, sampling_ratio, aligned, clockwise);
+void roi_align_rotated_v2_backward_impl(Tensor input, Tensor rois,
+                                        Tensor grad_output, Tensor grad_input,
+                                        int pooled_height, int pooled_width,
+                                        double spatial_scale,
+                                        int sampling_ratio, bool aligned,
+                                        bool clockwise) {
+  DISPATCH_DEVICE_IMPL(roi_align_rotated_v2_backward_impl, input, rois,
+                       grad_output, grad_input, pooled_height, pooled_width,
+                       spatial_scale, sampling_ratio, aligned, clockwise);
 }
 
-
-void roi_align_rotated_v2_backward(Tensor input, Tensor rois, Tensor grad_output, Tensor grad_input,
-                                    int pooled_height, int pooled_width, double spatial_scale,
-                                    int sampling_ratio, bool aligned, bool clockwise) {
+void roi_align_rotated_v2_backward(Tensor input, Tensor rois,
+                                   Tensor grad_output, Tensor grad_input,
+                                   int pooled_height, int pooled_width,
+                                   double spatial_scale, int sampling_ratio,
+                                   bool aligned, bool clockwise) {
   roi_align_rotated_v2_backward_impl(input, rois, grad_output, grad_input,
-                                    pooled_height, pooled_width, spatial_scale, sampling_ratio, aligned, clockwise);
+                                     pooled_height, pooled_width, spatial_scale,
+                                     sampling_ratio, aligned, clockwise);
 }