diff --git a/.github/workflows/build_push.yml b/.github/workflows/build_push.yml
new file mode 100644
index 0000000..e351c42
--- /dev/null
+++ b/.github/workflows/build_push.yml
@@ -0,0 +1,28 @@
+name: "Build"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  push:
+    branches:
+      - '*'
+      - '!ci_test_*'
+    tags-ignore:
+      - '*'
+  pull_request:
+    branches:
+      - '*'
+      - '!ci_test_*'
+
+jobs:
+  build:
+    name: Build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      - run: cargo build --all-features
+      - name: Test release pipeline
+        run: cargo publish --dry-run --manifest-path src/lib/Cargo.toml
\ No newline at end of file
diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml
new file mode 100644
index 0000000..48a7444
--- /dev/null
+++ b/.github/workflows/publish_release.yml
@@ -0,0 +1,35 @@
+name: Create Release
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+on:
+  push:
+    tags:
+      - '*'
+
+jobs:
+  build_and_publish:
+    name: Build
+    runs-on: ubuntu-latest
+    environment: Cargo
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions-rust-lang/setup-rust-toolchain@v1
+      - name: Make a release
+        env:
+          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_TOKEN }}
+        run: cargo publish --manifest-path src/lib/Cargo.toml
+
+  release:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write
+    needs:
+      - build_and_publish
+    steps:
+      - uses: actions/checkout@v3
+      - uses: ncipollo/release-action@v1
+        with:
+          bodyFile: 'CHANGELOG.md'
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a0038a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+/target
+.idea
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..4d03924
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1 @@
+Added conversions
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
new file mode 100644
index 0000000..2fb71b2
--- /dev/null
+++ b/Cargo.lock
@@ -0,0 +1,7 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "yuvutils-rs"
+version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..8c25a30
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "yuvutils-rs"
+version = "0.1.0"
+edition = "2021"
+description = "Rust utilities for YUV format handling and conversion."
+readme = "README.md"
+keywords = ["yuv"]
+license = "Apache-2.0 OR BSD-3-Clause"
+authors = ["Radzivon Bartoshyk"]
+documentation = "https://github.com/awxkee/yuvutils-rs"
+categories = ["multimedia::images", "multimedia::video"]
+homepage = "https://github.com/awxkee/yuvutils-rs"
+repository = "https://github.com/awxkee/yuvutils-rs"
+exclude = ["*.jpg"]
+
+[dependencies]
diff --git a/LICENSE-BSD.md b/LICENSE-BSD.md
new file mode 100644
index 0000000..bf616fd
--- /dev/null
+++ b/LICENSE-BSD.md
@@ -0,0 +1,26 @@
+Copyright (c) Radzivon Bartoshyk. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1.  Redistributions of source code must retain the above copyright notice, this
+    list of conditions and the following disclaimer.
+
+2.  Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+3.  Neither the name of the copyright holder nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/LICENSE.md b/LICENSE.md
new file mode 100644
index 0000000..86a13a8
--- /dev/null
+++ b/LICENSE.md
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright 2024 Radzivon Bartoshyk
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..12cf7ac
--- /dev/null
+++ b/README.md
@@ -0,0 +1 @@
+# Rust utilities for YUV format handling and conversion.
\ No newline at end of file
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..1dd020d
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,42 @@
+mod rgba_to_yuv;
+mod yuv_nv12;
+mod yuv_nv12_p10;
+mod yuv_support;
+mod yuv_to_rgba;
+
+pub use yuv_support::YuvStandardMatrix;
+pub use yuv_support::YuvRange;
+
+pub use yuv_nv12_p10::yuv_nv12_p10_to_bgra_be;
+pub use yuv_nv12_p10::yuv_nv16_p10_to_bgra_be;
+pub use yuv_nv12_p10::yuv_nv12_p10_to_bgra;
+pub use yuv_nv12_p10::yuv_nv16_p10_to_bgra;
+pub use yuv_nv12_p10::yuv_nv12_p10_msb_to_bgra;
+pub use yuv_nv12_p10::yuv_nv16_p10_msb_to_bgra;
+
+pub use yuv_nv12::yuv_nv12_to_bgra;
+pub use yuv_nv12::yuv_nv21_to_bgra;
+pub use yuv_nv12::yuv_nv12_to_rgba;
+pub use yuv_nv12::yuv_nv21_to_rgba;
+pub use yuv_nv12::yuv_nv12_to_rgb;
+pub use yuv_nv12::yuv_nv21_to_rgb;
+
+pub use yuv_to_rgba::yuv420_to_rgb;
+pub use yuv_to_rgba::yuv420_to_rgba;
+pub use yuv_to_rgba::yuv420_to_bgra;
+pub use yuv_to_rgba::yuv422_to_rgb;
+pub use yuv_to_rgba::yuv422_to_rgba;
+pub use yuv_to_rgba::yuv422_to_bgra;
+pub use yuv_to_rgba::yuv444_to_rgba;
+pub use yuv_to_rgba::yuv444_to_bgra;
+pub use yuv_to_rgba::yuv444_to_rgb;
+
+pub use rgba_to_yuv::rgb_to_yuv420;
+pub use rgba_to_yuv::rgba_to_yuv420;
+pub use rgba_to_yuv::bgra_to_yuv420;
+pub use rgba_to_yuv::rgb_to_yuv422;
+pub use rgba_to_yuv::rgba_to_yuv422;
+pub use rgba_to_yuv::bgra_to_yuv422;
+pub use rgba_to_yuv::rgb_to_yuv444;
+pub use rgba_to_yuv::rgba_to_yuv444;
+pub use rgba_to_yuv::bgra_to_yuv444;
\ No newline at end of file
diff --git a/src/rgba_to_yuv.rs b/src/rgba_to_yuv.rs
new file mode 100644
index 0000000..307b637
--- /dev/null
+++ b/src/rgba_to_yuv.rs
@@ -0,0 +1,763 @@
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::{
+    uint8x16_t, vcombine_u16, vcombine_u8, vdupq_n_s16, vdupq_n_s32, vget_low_s16,
+    vget_low_u8, vld3q_u8, vld4q_u8, vmaxq_s32, vmlal_high_s16, vmlal_s16,
+    vmovl_high_u8, vmovl_u8, vpaddlq_u8, vqmovn_u16, vqshrun_n_s32,
+    vreinterpretq_s16_u16, vshrn_n_u16, vst1_u8, vst1q_u8,
+};
+
+use crate::yuv_support::{
+    get_forward_transform, get_kr_kb, get_yuv_range, ToIntegerTransform, YuvChromaSample, YuvRange,
+    YuvSourceChannels, YuvStandardMatrix,
+};
+
+fn rgbx_to_yuv8<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    let chroma_subsampling: YuvChromaSample = SAMPLING.into();
+    let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into();
+    let channels = source_channels.get_channels_count();
+    let range = get_yuv_range(8, range);
+    let kr_kb = get_kr_kb(matrix);
+    let max_range_p8 = (2f32.powi(8) - 1f32) as u32;
+    let transform_precise = get_forward_transform(
+        max_range_p8,
+        range.range_y,
+        range.range_uv,
+        kr_kb.kr,
+        kr_kb.kb,
+    );
+    let transform = transform_precise.to_integers(8);
+    let precision_scale = (1 << 8) as f32;
+    let bias_y = ((range.bias_y as f32 + 0.5f32) * precision_scale) as i32;
+    let bias_uv = ((range.bias_uv as f32 + 0.5f32) * precision_scale) as i32;
+
+    let iterator_step = match chroma_subsampling {
+        YuvChromaSample::YUV420 => 2usize,
+        YuvChromaSample::YUV422 => 2usize,
+        YuvChromaSample::YUV444 => 1usize,
+    };
+
+    let mut y_offset = 0usize;
+    let mut u_offset = 0usize;
+    let mut v_offset = 0usize;
+    let mut rgba_offset = 0usize;
+
+    for y in 0..height as usize {
+        let mut cx = 0usize;
+
+        #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+        #[cfg(target_feature = "neon")]
+        unsafe {
+            let y_ptr = y_plane.as_mut_ptr();
+            let u_ptr = u_plane.as_mut_ptr();
+            let v_ptr = v_plane.as_mut_ptr();
+            let rgba_ptr = rgba.as_ptr();
+
+            let y_bias = vdupq_n_s32(bias_y);
+            let uv_bias = vdupq_n_s32(bias_uv);
+            let v_yr = vdupq_n_s16(transform.yr as i16);
+            let v_yg = vdupq_n_s16(transform.yg as i16);
+            let v_yb = vdupq_n_s16(transform.yb as i16);
+            let v_cb_r = vdupq_n_s16(transform.cb_r as i16);
+            let v_cb_g = vdupq_n_s16(transform.cb_g as i16);
+            let v_cb_b = vdupq_n_s16(transform.cb_b as i16);
+            let v_cr_r = vdupq_n_s16(transform.cr_r as i16);
+            let v_cr_g = vdupq_n_s16(transform.cr_g as i16);
+            let v_cr_b = vdupq_n_s16(transform.cr_b as i16);
+            let v_zeros = vdupq_n_s32(0i32);
+
+            while cx + 16 < width as usize {
+                let r_values_u8: uint8x16_t;
+                let g_values_u8: uint8x16_t;
+                let b_values_u8: uint8x16_t;
+
+                match source_channels {
+                    YuvSourceChannels::Rgb => {
+                        let rgb_values = vld3q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.0;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.2;
+                    }
+                    YuvSourceChannels::Rgba => {
+                        let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.0;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.2;
+                    }
+                    YuvSourceChannels::Bgra => {
+                        let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels));
+                        r_values_u8 = rgb_values.2;
+                        g_values_u8 = rgb_values.1;
+                        b_values_u8 = rgb_values.0;
+                    }
+                }
+
+                let r_high = vreinterpretq_s16_u16(vmovl_high_u8(r_values_u8));
+                let g_high = vreinterpretq_s16_u16(vmovl_high_u8(g_values_u8));
+                let b_high = vreinterpretq_s16_u16(vmovl_high_u8(b_values_u8));
+
+                let r_h_low = vget_low_s16(r_high);
+                let g_h_low = vget_low_s16(g_high);
+                let b_h_low = vget_low_s16(b_high);
+
+                let mut y_h_high = vmlal_high_s16(y_bias, r_high, v_yr);
+                y_h_high = vmlal_high_s16(y_h_high, g_high, v_yg);
+                y_h_high = vmlal_high_s16(y_h_high, b_high, v_yb);
+                y_h_high = vmaxq_s32(y_h_high, v_zeros);
+
+                let mut y_h_low = vmlal_s16(y_bias, r_h_low, vget_low_s16(v_yr));
+                y_h_low = vmlal_s16(y_h_low, g_h_low, vget_low_s16(v_yg));
+                y_h_low = vmlal_s16(y_h_low, b_h_low, vget_low_s16(v_yb));
+                y_h_low = vmaxq_s32(y_h_low, v_zeros);
+
+                let y_high =
+                    vcombine_u16(vqshrun_n_s32::<8>(y_h_low), vqshrun_n_s32::<8>(y_h_high));
+
+                let mut cb_h_high = vmlal_high_s16(uv_bias, r_high, v_cb_r);
+                cb_h_high = vmlal_high_s16(cb_h_high, g_high, v_cb_g);
+                cb_h_high = vmlal_high_s16(cb_h_high, b_high, v_cb_b);
+
+                let mut cb_h_low = vmlal_s16(uv_bias, r_h_low, vget_low_s16(v_cb_r));
+                cb_h_low = vmlal_s16(cb_h_low, g_h_low, vget_low_s16(v_cb_g));
+                cb_h_low = vmlal_s16(cb_h_low, b_h_low, vget_low_s16(v_cb_b));
+
+                let cb_high =
+                    vcombine_u16(vqshrun_n_s32::<8>(cb_h_low), vqshrun_n_s32::<8>(cb_h_high));
+
+                let mut cr_h_high = vmlal_high_s16(uv_bias, r_high, v_cr_r);
+                cr_h_high = vmlal_high_s16(cr_h_high, g_high, v_cr_g);
+                cr_h_high = vmlal_high_s16(cr_h_high, b_high, v_cr_b);
+
+                let mut cr_h_low = vmlal_s16(uv_bias, r_h_low, vget_low_s16(v_cr_r));
+                cr_h_low = vmlal_s16(cr_h_low, g_h_low, vget_low_s16(v_cr_g));
+                cr_h_low = vmlal_s16(cr_h_low, b_h_low, vget_low_s16(v_cr_b));
+
+                let cr_high =
+                    vcombine_u16(vqshrun_n_s32::<8>(cr_h_low), vqshrun_n_s32::<8>(cr_h_high));
+
+                let r_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(r_values_u8)));
+                let g_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(g_values_u8)));
+                let b_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b_values_u8)));
+
+                let r_l_low = vget_low_s16(r_low);
+                let g_l_low = vget_low_s16(g_low);
+                let b_l_low = vget_low_s16(b_low);
+
+                let mut y_l_high = vmlal_high_s16(y_bias, r_low, v_yr);
+                y_l_high = vmlal_high_s16(y_l_high, g_low, v_yg);
+                y_l_high = vmlal_high_s16(y_l_high, b_low, v_yb);
+                y_l_high = vmaxq_s32(y_l_high, v_zeros);
+
+                let mut y_l_low = vmlal_s16(y_bias, r_l_low, vget_low_s16(v_yr));
+                y_l_low = vmlal_s16(y_l_low, g_l_low, vget_low_s16(v_yg));
+                y_l_low = vmlal_s16(y_l_low, b_l_low, vget_low_s16(v_yb));
+                y_l_low = vmaxq_s32(y_l_low, v_zeros);
+
+                let y_low = vcombine_u16(vqshrun_n_s32::<8>(y_l_low), vqshrun_n_s32::<8>(y_l_high));
+
+                let mut cb_l_high = vmlal_high_s16(uv_bias, r_low, v_cb_r);
+                cb_l_high = vmlal_high_s16(cb_l_high, g_low, v_cb_g);
+                cb_l_high = vmlal_high_s16(cb_l_high, b_low, v_cb_b);
+
+                let mut cb_l_low = vmlal_s16(uv_bias, r_l_low, vget_low_s16(v_cb_r));
+                cb_l_low = vmlal_s16(cb_l_low, g_l_low, vget_low_s16(v_cb_g));
+                cb_l_low = vmlal_s16(cb_l_low, b_l_low, vget_low_s16(v_cb_b));
+
+                let cb_low =
+                    vcombine_u16(vqshrun_n_s32::<8>(cb_l_low), vqshrun_n_s32::<8>(cb_l_high));
+
+                let mut cr_l_high = vmlal_high_s16(uv_bias, r_low, v_cr_r);
+                cr_l_high = vmlal_high_s16(cr_l_high, g_low, v_cr_g);
+                cr_l_high = vmlal_high_s16(cr_l_high, b_low, v_cr_b);
+
+                let mut cr_l_low = vmlal_s16(uv_bias, r_l_low, vget_low_s16(v_cr_r));
+                cr_l_low = vmlal_s16(cr_l_low, g_l_low, vget_low_s16(v_cr_g));
+                cr_l_low = vmlal_s16(cr_l_low, b_l_low, vget_low_s16(v_cr_b));
+
+                let cr_low =
+                    vcombine_u16(vqshrun_n_s32::<8>(cr_l_low), vqshrun_n_s32::<8>(cr_l_high));
+
+                let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high));
+                let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high));
+                let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high));
+                vst1q_u8(y_ptr.add(y_offset + cx), y);
+
+                match chroma_subsampling {
+                    YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => {
+                        let cb_s = vshrn_n_u16::<1>(vpaddlq_u8(cb));
+                        let cr_s = vshrn_n_u16::<1>(vpaddlq_u8(cr));
+                        vst1_u8(u_ptr.add(u_offset + cx / 2), cb_s);
+                        vst1_u8(v_ptr.add(u_offset + cx / 2), cr_s);
+                    }
+                    YuvChromaSample::YUV444 => {
+                        vst1q_u8(u_ptr.add(u_offset + cx), cb);
+                        vst1q_u8(v_ptr.add(v_offset + cx), cr);
+                    }
+                }
+
+                cx += 16;
+            }
+        }
+
+        for x in (cx..width as usize).step_by(iterator_step) {
+            let px = x * channels;
+            let r = rgba[rgba_offset + px + source_channels.get_r_channel_offset()] as i32;
+            let g = rgba[rgba_offset + px + source_channels.get_g_channel_offset()] as i32;
+            let b = rgba[rgba_offset + px + source_channels.get_b_channel_offset()] as i32;
+            let y_0 = (r * transform.yr + g * transform.yg + b * transform.yb + bias_y) >> 8;
+            let cb = (r * transform.cb_r + g * transform.cb_g + b * transform.cb_b + bias_uv) >> 8;
+            let cr = (r * transform.cr_r + g * transform.cr_g + b * transform.cr_b + bias_uv) >> 8;
+            y_plane[y_offset + x] = y_0 as u8;
+            let u_pos = match chroma_subsampling {
+                YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => u_offset + x / 2,
+                YuvChromaSample::YUV444 => u_offset + x,
+            };
+            u_plane[u_pos] = cb as u8;
+            let v_pos = match chroma_subsampling {
+                YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => v_offset + x / 2,
+                YuvChromaSample::YUV444 => v_offset + x,
+            };
+            v_plane[v_pos] = cr as u8;
+            match chroma_subsampling {
+                YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => {
+                    if x + 1 < width as usize {
+                        let next_px = (x + 1) * channels;
+                        let r = rgba[rgba_offset + next_px + source_channels.get_r_channel_offset()]
+                            as i32;
+                        let g = rgba[rgba_offset + next_px + source_channels.get_g_channel_offset()]
+                            as i32;
+                        let b = rgba[rgba_offset + next_px + source_channels.get_b_channel_offset()]
+                            as i32;
+                        let y_1 =
+                            (r * transform.yr + g * transform.yg + b * transform.yb + bias_y) >> 8;
+                        y_plane[y_offset + x + 1] = y_1 as u8;
+                    }
+                }
+                _ => {}
+            }
+        }
+
+        y_offset += y_stride as usize;
+        rgba_offset += rgba_stride as usize;
+        match chroma_subsampling {
+            YuvChromaSample::YUV420 => {
+                if y & 1 == 1 {
+                    u_offset += u_stride as usize;
+                    v_offset += v_stride as usize;
+                }
+            }
+            YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => {
+                u_offset += u_stride as usize;
+                v_offset += v_stride as usize;
+            }
+        }
+    }
+}
+
+/// Convert RGB image data to YUV422 planar format.
+///
+/// This function performs RGB to YUV conversion and stores the result in YUV422 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgb` - The input RGB image data slice.
+/// * `rgb_stride` - The stride (bytes per row) for the RGB image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGB data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgb_to_yuv422(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgb: &[u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert RGBA image data to YUV422 planar format.
+///
+/// This function performs RGBA to YUV conversion and stores the result in YUV422 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgba` - The input RGBA image data slice.
+/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGBA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgba_to_yuv422(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert BGRA image data to YUV422 planar format.
+///
+/// This function performs BGRA to YUV conversion and stores the result in YUV422 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `bgra` - The input BGRA image data slice.
+/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn bgra_to_yuv422(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    bgra: &[u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert RGB image data to YUV420 planar format.
+///
+/// This function performs RGB to YUV conversion and stores the result in YUV420 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgb` - The input RGB image data slice.
+/// * `rgb_stride` - The stride (bytes per row) for the RGB image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGBA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgb_to_yuv420(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgb: &[u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert RGBA image data to YUV420 planar format.
+///
+/// This function performs RGBA to YUV conversion and stores the result in YUV420 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgba` - The input RGBA image data slice.
+/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGBA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgba_to_yuv420(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert BGRA image data to YUV420 planar format.
+///
+/// This function performs BGRA to YUV conversion and stores the result in YUV420 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `bgra` - The input BGRA image data slice.
+/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn bgra_to_yuv420(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    bgra: &[u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert RGB image data to YUV444 planar format.
+///
+/// This function performs RGB to YUV conversion and stores the result in YUV444 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgb` - The input RGB image data slice.
+/// * `rgb_stride` - The stride (bytes per row) for the RGB image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGB data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgb_to_yuv444(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgb: &[u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert RGBA image data to YUV444 planar format.
+///
+/// This function performs RGBA to YUV conversion and stores the result in YUV444 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `rgba` - The input RGBA image data slice.
+/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input RGBA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn rgba_to_yuv444(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert BGRA image data to YUV444 planar format.
+///
+/// This function performs BGRA to YUV conversion and stores the result in YUV444 planar format,
+/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components.
+///
+/// # Arguments
+///
+/// * `y_plane` - A mutable slice to store the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A mutable slice to store the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A mutable slice to store the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `bgra` - The input BGRA image data slice.
+/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data.
+/// * `width` - The width of the image in pixels.
+/// * `height` - The height of the image in pixels.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn bgra_to_yuv444(
+    y_plane: &mut [u8],
+    y_stride: u32,
+    u_plane: &mut [u8],
+    u_stride: u32,
+    v_plane: &mut [u8],
+    v_stride: u32,
+    rgba: &[u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
diff --git a/src/yuv_nv12.rs b/src/yuv_nv12.rs
new file mode 100644
index 0000000..7ac834a
--- /dev/null
+++ b/src/yuv_nv12.rs
@@ -0,0 +1,551 @@
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::{
+    uint8x16x2_t, uint8x16x3_t, uint8x16x4_t, uint8x8_t, uint8x8x2_t, vcombine_u8, vdup_n_u8,
+    vdupq_n_s16, vdupq_n_u8, vget_high_u8, vget_low_u8, vld1q_u8, vld2_u8, vld2q_u8, vmaxq_s16,
+    vmovl_u8, vmull_high_u8, vmull_u8, vmulq_s16, vqaddq_s16, vqshrun_n_s16, vreinterpretq_s16_u16,
+    vst3q_u8, vst4q_u8, vsubq_s16, vsubq_u8, vzip1_u8, vzip2_u8,
+};
+
+use crate::yuv_support::{
+    get_inverse_transform, get_kr_kb, get_yuv_range, YuvChromaSample, YuvNVOrder, YuvRange,
+    YuvSourceChannels, YuvStandardMatrix,
+};
+
+fn yuv_nv12_to_rgbx<
+    const UV_ORDER: u8,
+    const DESTINATION_CHANNELS: u8,
+    const YUV_CHROMA_SAMPLING: u8,
+>(
+    y_plane: &[u8],
+    y_stride: u32,
+    uv_plane: &[u8],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    let order: YuvNVOrder = UV_ORDER.into();
+    let range = get_yuv_range(8, range);
+    let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
+    let chroma_subsampling: YuvChromaSample = YUV_CHROMA_SAMPLING.into();
+    let channels = destination_channels.get_channels_count();
+    let kr_kb = get_kr_kb(matrix);
+    let transform = get_inverse_transform(255, range.range_y, range.range_uv, kr_kb.kr, kr_kb.kb);
+    let i_transform = transform.to_integers(6u32);
+    let cr_coef = i_transform.cr_coef;
+    let cb_coef = i_transform.cb_coef;
+    let y_coef = i_transform.y_coef;
+    let g_coef_1 = i_transform.g_coeff_1;
+    let g_coef_2 = i_transform.g_coeff_2;
+
+    let bias_y = range.bias_y as i32;
+    let bias_uv = range.bias_uv as i32;
+
+    let mut y_offset = 0usize;
+    let mut uv_offset = 0usize;
+    let mut dst_offset = 0usize;
+
+    for y in 0..height as usize {
+        let mut x = 0usize;
+
+        #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+        #[cfg(target_feature = "neon")]
+        unsafe {
+            let y_ptr = y_plane.as_ptr();
+            let uv_ptr = uv_plane.as_ptr();
+            let bgra_ptr = bgra.as_mut_ptr();
+
+            let y_corr = vdupq_n_u8(bias_y as u8);
+            let uv_corr = vdupq_n_s16(bias_uv as i16);
+            let v_luma_coeff = vdupq_n_u8(y_coef as u8);
+            let v_luma_coeff_8 = vdup_n_u8(y_coef as u8);
+            let v_cr_coeff = vdupq_n_s16(cr_coef as i16);
+            let v_cb_coeff = vdupq_n_s16(cb_coef as i16);
+            let v_min_values = vdupq_n_s16(0i16);
+            let v_g_coeff_1 = vdupq_n_s16(-1i16 * (g_coef_1 as i16));
+            let v_g_coeff_2 = vdupq_n_s16(-1i16 * (g_coef_2 as i16));
+            let v_alpha = vdupq_n_u8(255u8);
+            while x + 16 < width as usize {
+                let y_values = vsubq_u8(vld1q_u8(y_ptr.add(y_offset + x)), y_corr);
+
+                let u_high_u8: uint8x8_t;
+                let v_high_u8: uint8x8_t;
+                let u_low_u8: uint8x8_t;
+                let v_low_u8: uint8x8_t;
+
+                match chroma_subsampling {
+                    YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => {
+                        let mut uv_values = vld2_u8(uv_ptr.add(uv_offset + x));
+                        if order == YuvNVOrder::VU {
+                            uv_values = uint8x8x2_t(uv_values.1, uv_values.0);
+                        }
+
+                        u_high_u8 = vzip2_u8(uv_values.0, uv_values.0);
+                        v_high_u8 = vzip2_u8(uv_values.1, uv_values.1);
+                        u_low_u8 = vzip1_u8(uv_values.0, uv_values.0);
+                        v_low_u8 = vzip1_u8(uv_values.1, uv_values.1);
+                    }
+                    YuvChromaSample::YUV444 => {
+                        let mut uv_values = vld2q_u8(uv_ptr.add(uv_offset + x * 2));
+                        if order == YuvNVOrder::VU {
+                            uv_values = uint8x16x2_t(uv_values.1, uv_values.0);
+                        }
+                        u_high_u8 = vget_high_u8(uv_values.0);
+                        v_high_u8 = vget_high_u8(uv_values.1);
+                        u_low_u8 = vget_low_u8(uv_values.0);
+                        v_low_u8 = vget_low_u8(uv_values.1);
+                    }
+                }
+
+                let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr);
+                let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr);
+                let y_high = vreinterpretq_s16_u16(vmull_high_u8(y_values, v_luma_coeff));
+
+                let r_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_high, vmulq_s16(v_high, v_cr_coeff)),
+                    v_min_values,
+                ));
+                let b_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_high, vmulq_s16(u_high, v_cb_coeff)),
+                    v_min_values,
+                ));
+                let g_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(
+                        y_high,
+                        vqaddq_s16(
+                            vmulq_s16(v_high, v_g_coeff_1),
+                            vmulq_s16(u_high, v_g_coeff_2),
+                        ),
+                    ),
+                    v_min_values,
+                ));
+
+                let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr);
+                let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr);
+                let y_low = vreinterpretq_s16_u16(vmull_u8(vget_low_u8(y_values), v_luma_coeff_8));
+
+                let r_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_low, vmulq_s16(v_low, v_cr_coeff)),
+                    v_min_values,
+                ));
+                let b_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_low, vmulq_s16(u_low, v_cb_coeff)),
+                    v_min_values,
+                ));
+                let g_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(
+                        y_low,
+                        vqaddq_s16(vmulq_s16(v_low, v_g_coeff_1), vmulq_s16(u_low, v_g_coeff_2)),
+                    ),
+                    v_min_values,
+                ));
+
+                let r_values = vcombine_u8(r_low, r_high);
+                let g_values = vcombine_u8(g_low, g_high);
+                let b_values = vcombine_u8(b_low, b_high);
+
+                let dst_shift = dst_offset + x * channels;
+
+                match destination_channels {
+                    YuvSourceChannels::Rgb => {
+                        let dst_pack: uint8x16x3_t = uint8x16x3_t(r_values, g_values, b_values);
+                        vst3q_u8(bgra_ptr.add(dst_shift), dst_pack);
+                    }
+                    YuvSourceChannels::Rgba => {
+                        let dst_pack: uint8x16x4_t =
+                            uint8x16x4_t(b_values, g_values, r_values, v_alpha);
+                        vst4q_u8(bgra_ptr.add(dst_shift), dst_pack);
+                    }
+                    YuvSourceChannels::Bgra => {
+                        let dst_pack: uint8x16x4_t =
+                            uint8x16x4_t(r_values, g_values, b_values, v_alpha);
+                        vst4q_u8(bgra_ptr.add(dst_shift), dst_pack);
+                    }
+                }
+
+                x += 16;
+            }
+        }
+
+        while x < width as usize {
+            let y_value = (y_plane[y_offset + x] as i32 - bias_y) * y_coef;
+            let cb_value: i32;
+            let cr_value: i32;
+            let cb_pos = uv_offset + x;
+            let cr_pos = uv_offset + x + 1;
+
+            match order {
+                YuvNVOrder::UV => {
+                    cb_value = uv_plane[cb_pos] as i32 - bias_uv;
+                    cr_value = uv_plane[cr_pos] as i32 - bias_uv;
+                }
+                YuvNVOrder::VU => {
+                    cb_value = uv_plane[cr_pos] as i32 - bias_uv;
+                    cr_value = uv_plane[cb_pos] as i32 - bias_uv;
+                }
+            }
+
+            let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0);
+            let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0);
+            let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6)
+                .min(255)
+                .max(0);
+
+            let px = x * channels;
+
+            let dst_shift = dst_offset + px;
+
+            bgra[dst_shift + destination_channels.get_b_channel_offset()] = b as u8;
+            bgra[dst_shift + destination_channels.get_g_channel_offset()] = g as u8;
+            bgra[dst_shift + destination_channels.get_r_channel_offset()] = r as u8;
+            if destination_channels.has_alpha() {
+                bgra[dst_shift + destination_channels.get_a_channel_offset()] = 255;
+            }
+
+            if chroma_subsampling == YuvChromaSample::YUV422
+                || chroma_subsampling == YuvChromaSample::YUV420
+            {
+                x += 1;
+                if x + 1 < width as usize {
+                    let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef;
+
+                    let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0);
+                    let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0);
+                    let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6)
+                        .min(255)
+                        .max(0);
+
+                    let next_px = x * channels;
+                    let dst_shift = dst_offset + next_px;
+                    bgra[dst_shift + destination_channels.get_b_channel_offset()] = b as u8;
+                    bgra[dst_shift + destination_channels.get_g_channel_offset()] = g as u8;
+                    bgra[dst_shift + destination_channels.get_r_channel_offset()] = r as u8;
+                    if destination_channels.has_alpha() {
+                        bgra[dst_shift + destination_channels.get_a_channel_offset()] = 255;
+                    }
+                }
+            }
+
+            x += 1;
+        }
+
+        match chroma_subsampling {
+            YuvChromaSample::YUV420 => {
+                if y & 1 == 1 {
+                    uv_offset += uv_stride as usize;
+                }
+            }
+            YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => {
+                uv_offset += uv_stride as usize;
+            }
+        }
+
+        dst_offset += bgra_stride as usize;
+        y_offset += y_stride as usize;
+    }
+}
+
+/// Convert YUV NV12 format to BGRA format.
+///
+/// This function takes YUV NV12 data with 8-bit precision,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `uv_plane` - A slice to load the UV (chrominance) plane data.
+/// * `uv_stride` - The stride (bytes per row) for the UV plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_to_bgra(
+    y_plane: &[u8],
+    y_stride: u32,
+    uv_plane: &[u8],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::UV as u8 },
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV NV21 format to BGRA format.
+///
+/// This function takes YUV NV12 data with 8-bit precision,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `vu_plane` - A slice to load the VU (chrominance) plane data.
+/// * `vu_stride` - The stride (bytes per row) for the VU plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv21_to_bgra(
+    y_plane: &[u8],
+    y_stride: u32,
+    vu_plane: &[u8],
+    vu_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::VU as u8 },
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        vu_plane,
+        vu_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV NV12 format to RGBA format.
+///
+/// This function takes YUV NV12 data with 8-bit precision,
+/// and converts it to RGBA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `uv_plane` - A slice to load the UV (chrominance) plane data.
+/// * `uv_stride` - The stride (bytes per row) for the UV plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgba_data` - A mutable slice to store the converted RGBA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_to_rgba(
+    y_plane: &[u8],
+    y_stride: u32,
+    uv_plane: &[u8],
+    uv_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::UV as u8 },
+        { YuvSourceChannels::Rgba as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV NV21 format to RGBA format.
+///
+/// This function takes YUV NV21 data with 8-bit precision,
+/// and converts it to RGBA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `vu_plane` - A slice to load the VU (chrominance) plane data.
+/// * `vu_stride` - The stride (bytes per row) for the VU plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgba_data` - A mutable slice to store the converted RGBA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv21_to_rgba(
+    y_plane: &[u8],
+    y_stride: u32,
+    vu_plane: &[u8],
+    vu_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::VU as u8 },
+        { YuvSourceChannels::Rgba as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        vu_plane,
+        vu_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV NV12 format to RGB format.
+///
+/// This function takes YUV NV12 data with 8-bit precision,
+/// and converts it to RGB format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `uv_plane` - A slice to load the UV (chrominance) plane data.
+/// * `uv_stride` - The stride (bytes per row) for the UV plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgb_data` - A mutable slice to store the converted RGB data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_to_rgb(
+    y_plane: &[u8],
+    y_stride: u32,
+    uv_plane: &[u8],
+    uv_stride: u32,
+    rgb: &mut [u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::UV as u8 },
+        { YuvSourceChannels::Rgb as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV NV21 format to RGB format.
+///
+/// This function takes YUV NV21 data with 8-bit precision,
+/// and converts it to RGB format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `vu_plane` - A slice to load the VU (chrominance) plane data.
+/// * `vu_stride` - The stride (bytes per row) for the VU plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgb_data` - A mutable slice to store the converted RGB data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv21_to_rgb(
+    y_plane: &[u8],
+    y_stride: u32,
+    vu_plane: &[u8],
+    vu_stride: u32,
+    rgb: &mut [u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_to_rgbx::<
+        { YuvNVOrder::VU as u8 },
+        { YuvSourceChannels::Rgb as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+    >(
+        y_plane,
+        y_stride,
+        vu_plane,
+        vu_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
diff --git a/src/yuv_nv12_p10.rs b/src/yuv_nv12_p10.rs
new file mode 100644
index 0000000..34ed931
--- /dev/null
+++ b/src/yuv_nv12_p10.rs
@@ -0,0 +1,628 @@
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::{
+    int16x4_t, int16x8_t, uint16x4x2_t, uint8x8x4_t, vcombine_s16, vdup_n_s16, vdup_n_u8,
+    vdupq_n_s16, vget_low_s16, vld1q_u16, vld2_u16, vmaxq_s16, vmlal_s16, vmull_high_s16,
+    vmull_s16, vreinterpret_s16_u16, vreinterpret_u16_u8, vreinterpret_u8_u16,
+    vreinterpretq_s16_u16, vqshrun_n_s16, vreinterpretq_u16_u8, vreinterpretq_u8_u16,
+    vrev16_u8, vrev16q_u8, vshr_n_u16, vshrn_n_s32, vshrq_n_u16, vst4_u8, vsub_s16, vsubq_s16,
+    vzip1_s16, vzip2_s16, uint8x8x3_t, vst3_u8,
+};
+use std::slice;
+
+use crate::yuv_support::{
+    get_inverse_transform, get_kr_kb, get_yuv_range, YuvBytesPosition, YuvChromaSample, YuvEndian,
+    YuvNVOrder, YuvRange, YuvSourceChannels, YuvStandardMatrix,
+};
+
+fn yuv_nv12_p10_to_bgra_impl<
+    const DESTINATION_CHANNELS: u8,
+    const NV_ORDER: u8,
+    const SAMPLING: u8,
+    const ENDIANNESS: u8,
+    const BYTES_POSITION: u8,
+>(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
+    let channels = destination_channels.get_channels_count();
+    let uv_order: YuvNVOrder = NV_ORDER.into();
+    let chroma_subsampling: YuvChromaSample = SAMPLING.into();
+    let endianness: YuvEndian = ENDIANNESS.into();
+    let bytes_position: YuvBytesPosition = BYTES_POSITION.into();
+    let range = get_yuv_range(10, range);
+    let kr_kb = get_kr_kb(matrix);
+    let max_range_p10 = (2f32.powi(10) - 1f32) as u32;
+    let transform = get_inverse_transform(
+        max_range_p10,
+        range.range_y,
+        range.range_uv,
+        kr_kb.kr,
+        kr_kb.kb,
+    );
+    let i_transform = transform.to_integers(6u32);
+    let cr_coef = i_transform.cr_coef;
+    let cb_coef = i_transform.cb_coef;
+    let y_coef = i_transform.y_coef;
+    let g_coef_1 = i_transform.g_coeff_1;
+    let g_coef_2 = i_transform.g_coeff_2;
+
+    let bias_y = range.bias_y as i32;
+    let bias_uv = range.bias_uv as i32;
+
+    let mut y_offset = 0usize;
+    let mut uv_offset = 0usize;
+    let mut dst_offest = 0usize;
+
+    let y_src_ptr = y_plane.as_ptr() as *const u8;
+    let uv_src_ptr = uv_plane.as_ptr() as *const u8;
+
+    for y in 0..height as usize {
+        let mut x = 0usize;
+
+        let y_ld_ptr = unsafe { y_src_ptr.offset(y_offset as isize) as *const u16 };
+        let y_ld = unsafe { slice::from_raw_parts(y_ld_ptr, width as usize) };
+        let uv_ld_ptr = unsafe { uv_src_ptr.offset(uv_offset as isize) as *const u16 };
+        let uv_ld = unsafe { slice::from_raw_parts(uv_ld_ptr, width as usize * 2usize) };
+
+        #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+        #[cfg(target_feature = "neon")]
+        unsafe {
+            let dst_ptr = bgra.as_mut_ptr();
+
+            let y_corr = vdupq_n_s16(bias_y as i16);
+            let uv_corr = vdup_n_s16(bias_uv as i16);
+            let v_luma_coeff = vdupq_n_s16(y_coef as i16);
+            let v_luma_coeff_4 = vdup_n_s16(y_coef as i16);
+            let v_cr_coeff = vdup_n_s16(cr_coef as i16);
+            let v_cb_coeff = vdup_n_s16(cb_coef as i16);
+            let v_min_values = vdupq_n_s16(0i16);
+            let v_g_coeff_1 = vdup_n_s16(-1i16 * (g_coef_1 as i16));
+            let v_g_coeff_2 = vdup_n_s16(-1i16 * (g_coef_2 as i16));
+            let v_alpha = vdup_n_u8(255u8);
+
+            while x + 8 < width as usize {
+                let y_values: int16x8_t;
+
+                let u_values_c: int16x4_t;
+                let v_values_c: int16x4_t;
+
+                match endianness {
+                    YuvEndian::BigEndian => {
+                        let mut y_u_values = vreinterpretq_u16_u8(vrev16q_u8(
+                            vreinterpretq_u8_u16(vld1q_u16(y_ld_ptr.add(x))),
+                        ));
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            y_u_values = vshrq_n_u16::<6>(y_u_values);
+                        }
+                        y_values = vsubq_s16(vreinterpretq_s16_u16(y_u_values), y_corr);
+
+                        let mut uv_values_u = vld2_u16(uv_ld_ptr.add(x));
+
+                        if uv_order == YuvNVOrder::VU {
+                            uv_values_u = uint16x4x2_t(uv_values_u.1, uv_values_u.0);
+                        }
+
+                        let mut u_v =
+                            vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(uv_values_u.0)));
+                        let mut v_v =
+                            vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(uv_values_u.1)));
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            u_v = vshr_n_u16::<6>(u_v);
+                            v_v = vshr_n_u16::<6>(v_v);
+                        }
+                        u_values_c = vsub_s16(vreinterpret_s16_u16(u_v), uv_corr);
+                        v_values_c = vsub_s16(vreinterpret_s16_u16(v_v), uv_corr);
+                    }
+                    YuvEndian::LittleEndian => {
+                        let mut y_vl = vld1q_u16(y_ld_ptr.add(x));
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            y_vl = vshrq_n_u16::<6>(y_vl);
+                        }
+                        y_values = vsubq_s16(vreinterpretq_s16_u16(y_vl), y_corr);
+
+                        let mut uv_values_u = vld2_u16(uv_ld_ptr.add(x));
+
+                        if uv_order == YuvNVOrder::VU {
+                            uv_values_u = uint16x4x2_t(uv_values_u.1, uv_values_u.0);
+                        }
+
+                        let mut u_vl = uv_values_u.0;
+                        let mut v_vl = uv_values_u.1;
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            u_vl = vshr_n_u16::<6>(u_vl);
+                            v_vl = vshr_n_u16::<6>(v_vl);
+                        }
+                        u_values_c = vsub_s16(vreinterpret_s16_u16(u_vl), uv_corr);
+                        v_values_c = vsub_s16(vreinterpret_s16_u16(v_vl), uv_corr);
+                    }
+                }
+
+                let u_high = vzip2_s16(u_values_c, u_values_c);
+                let v_high = vzip2_s16(v_values_c, v_values_c);
+
+                let y_high = vmull_high_s16(y_values, v_luma_coeff);
+
+                let r_high = vshrn_n_s32::<6>(vmlal_s16(y_high, v_high, v_cr_coeff));
+                let b_high = vshrn_n_s32::<6>(vmlal_s16(y_high, u_high, v_cb_coeff));
+                let g_high = vshrn_n_s32::<6>(vmlal_s16(
+                    vmlal_s16(y_high, v_high, v_g_coeff_1),
+                    u_high,
+                    v_g_coeff_2,
+                ));
+
+                let y_low = vmull_s16(vget_low_s16(y_values), v_luma_coeff_4);
+                let u_low = vzip1_s16(u_values_c, u_values_c);
+                let v_low = vzip1_s16(v_values_c, v_values_c);
+
+                let r_low = vshrn_n_s32::<6>(vmlal_s16(y_low, v_low, v_cr_coeff));
+                let b_low = vshrn_n_s32::<6>(vmlal_s16(y_low, u_low, v_cb_coeff));
+                let g_low = vshrn_n_s32::<6>(vmlal_s16(
+                    vmlal_s16(y_low, v_low, v_g_coeff_1),
+                    u_low,
+                    v_g_coeff_2,
+                ));
+
+                let r_values = vqshrun_n_s16::<2>(vmaxq_s16(
+                    vcombine_s16(r_low, r_high),
+                    v_min_values,
+                ));
+                let g_values = vqshrun_n_s16::<2>(vmaxq_s16(
+                    vcombine_s16(g_low, g_high),
+                    v_min_values,
+                ));
+                let b_values = vqshrun_n_s16::<2>(vmaxq_s16(
+                    vcombine_s16(b_low, b_high),
+                    v_min_values,
+                ));
+
+                match destination_channels {
+                    YuvSourceChannels::Rgb => {
+                        let dst_pack: uint8x8x3_t = uint8x8x3_t(r_values, g_values, b_values);
+                        vst3_u8(dst_ptr.add(dst_offest + x * channels), dst_pack);
+                    }
+                    YuvSourceChannels::Rgba => {
+                        let dst_pack: uint8x8x4_t = uint8x8x4_t(r_values, g_values, b_values, v_alpha);
+                        vst4_u8(dst_ptr.add(dst_offest + x * channels), dst_pack);
+                    }
+                    YuvSourceChannels::Bgra => {
+                        let dst_pack: uint8x8x4_t = uint8x8x4_t(b_values, g_values, r_values, v_alpha);
+                        vst4_u8(dst_ptr.add(dst_offest + x * channels), dst_pack);
+                    }
+                }
+
+                x += 8;
+            }
+        }
+
+        while x < width as usize {
+            let y_value: i32;
+            let cb_value: i32;
+            let cr_value: i32;
+            match endianness {
+                YuvEndian::BigEndian => {
+                    let mut y_vl = u16::from_be(y_ld[x]) as i32;
+                    let mut cb_vl = u16::from_be(uv_ld[x]) as i32;
+                    let mut cr_vl = u16::from_be(uv_ld[x + 1]) as i32;
+                    if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                        y_vl = y_vl >> 6;
+                        cb_vl = cb_vl >> 6;
+                        cr_vl = cr_vl >> 6;
+                    }
+                    y_value = (y_vl - bias_y) * y_coef;
+
+                    match uv_order {
+                        YuvNVOrder::UV => {
+                            cb_value = cb_vl - bias_uv;
+                            cr_value = cr_vl - bias_uv;
+                        }
+                        YuvNVOrder::VU => {
+                            cr_value = cb_vl - bias_uv;
+                            cb_value = cr_vl - bias_uv;
+                        }
+                    }
+                }
+                YuvEndian::LittleEndian => {
+                    let mut y_vl = u16::from_le(y_ld[x]) as i32;
+                    let mut cb_vl = u16::from_le(uv_ld[x]) as i32;
+                    let mut cr_vl = u16::from_le(uv_ld[x + 1]) as i32;
+                    if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                        y_vl = y_vl >> 6;
+                        cb_vl = cb_vl >> 6;
+                        cr_vl = cr_vl >> 6;
+                    }
+                    y_value = (y_vl - bias_y) * y_coef;
+                    match uv_order {
+                        YuvNVOrder::UV => {
+                            cb_value = cb_vl - bias_uv;
+                            cr_value = cr_vl - bias_uv;
+                        }
+                        YuvNVOrder::VU => {
+                            cr_value = cb_vl - bias_uv;
+                            cb_value = cr_vl - bias_uv;
+                        }
+                    }
+                }
+            }
+
+            // shift right 8 due to we want to make it 8 bit instead of 10
+
+            let r_u16 = (y_value + cr_coef * cr_value) >> 8;
+            let b_u16 = (y_value + cb_coef * cb_value) >> 8;
+            let g_u16 = (y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 8;
+
+            let r = r_u16.min(255).max(0);
+            let b = b_u16.min(255).max(0);
+            let g = g_u16.min(255).max(0);
+
+            let px = x * channels;
+
+            let rgb_offset = dst_offest + px;
+
+            bgra[rgb_offset + destination_channels.get_b_channel_offset()] = b as u8;
+            bgra[rgb_offset + destination_channels.get_g_channel_offset()] = g as u8;
+            bgra[rgb_offset + destination_channels.get_r_channel_offset()] = r as u8;
+            if destination_channels.has_alpha() {
+                bgra[rgb_offset + destination_channels.get_a_channel_offset()] = 255;
+            }
+
+            x += 1;
+
+            if x + 1 < width as usize {
+                let y_value: i32;
+                match endianness {
+                    YuvEndian::BigEndian => {
+                        let mut y_vl = u16::from_be(y_ld[x]) as i32;
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            y_vl = y_vl >> 6;
+                        }
+                        y_value = (y_vl - bias_y) * y_coef;
+                    }
+                    YuvEndian::LittleEndian => {
+                        let mut y_vl = u16::from_le(y_ld[x]) as i32;
+                        if bytes_position == YuvBytesPosition::MostSignificantBytes {
+                            y_vl = y_vl >> 6;
+                        }
+                        y_value = (y_vl - bias_y) * y_coef;
+                    }
+                }
+
+                let r_u16 = (y_value + cr_coef * cr_value) >> 8;
+                let b_u16 = (y_value + cb_coef * cb_value) >> 8;
+                let g_u16 = (y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 8;
+
+                let r = r_u16.min(255).max(0);
+                let b = b_u16.min(255).max(0);
+                let g = g_u16.min(255).max(0);
+
+                let px = x * channels;
+                let rgb_offset = dst_offest + px;
+                bgra[rgb_offset + destination_channels.get_b_channel_offset()] = b as u8;
+                bgra[rgb_offset + destination_channels.get_g_channel_offset()] = g as u8;
+                bgra[rgb_offset + destination_channels.get_r_channel_offset()] = r as u8;
+                if destination_channels.has_alpha() {
+                    bgra[rgb_offset + destination_channels.get_a_channel_offset()] = 255;
+                }
+            }
+
+            x += 1;
+        }
+
+        match chroma_subsampling {
+            YuvChromaSample::YUV420 => {
+                if y & 1 == 1 {
+                    uv_offset += uv_stride as usize;
+                }
+            }
+            YuvChromaSample::YUV422 | YuvChromaSample::YUV444 => {
+                uv_offset += uv_stride as usize;
+            }
+        }
+
+        dst_offest += bgra_stride as usize;
+        y_offset += y_stride as usize;
+    }
+}
+
+/// Convert YUV NV12 format with P010 pixel format to BGRA format (Little-Endian).
+///
+/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian.
+/// and converts it to BGRA format with big-endian byte order.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format (Little-Endian).
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_p10_to_bgra(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+        { YuvEndian::LittleEndian as u8 },
+        { YuvBytesPosition::LeastSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert YUV NV16 format with P010 pixel format to BGRA format (Little-Endian).
+///
+/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian.
+/// and converts it to BGRA format with big-endian byte order.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format (Little-Endian).
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv16_p10_to_bgra(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV422 as u8 },
+        { YuvEndian::LittleEndian as u8 },
+        { YuvBytesPosition::LeastSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert YUV NV12 format with P010 pixel format to BGRA format (Big-Endian).
+///
+/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian.
+/// and converts it to BGRA format with big-endian byte order.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format (Big-Endian).
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_p10_to_bgra_be(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+        { YuvEndian::BigEndian as u8 },
+        { YuvBytesPosition::LeastSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert YUV NV16 format with P010 pixel format to BGRA format (Big-Endian).
+///
+/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian.
+/// and converts it to BGRA format with big-endian byte order.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format (Big-Endian).
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv16_p10_to_bgra_be(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV422 as u8 },
+        { YuvEndian::BigEndian as u8 },
+        { YuvBytesPosition::LeastSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert YUV NV12 format with P010 pixel format (MSB) to BGRA format.
+///
+/// This function takes YUV NV16 data with 10-bit precision and MSB ordering,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv12_p10_msb_to_bgra(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV420 as u8 },
+        { YuvEndian::LittleEndian as u8 },
+        { YuvBytesPosition::MostSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
+
+/// Convert YUV NV16 format with P010 pixel format (MSB) to BGRA format.
+///
+/// This function takes YUV NV16 data with 10-bit precision and MSB ordering,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv_nv16_p10_msb_to_bgra(
+    y_plane: &[u16],
+    y_stride: u32,
+    uv_plane: &[u16],
+    uv_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_nv12_p10_to_bgra_impl::<
+        { YuvSourceChannels::Bgra as u8 },
+        { YuvNVOrder::UV as u8 },
+        { YuvChromaSample::YUV422 as u8 },
+        { YuvEndian::LittleEndian as u8 },
+        { YuvBytesPosition::MostSignificantBytes as u8 },
+    >(
+        y_plane,
+        y_stride,
+        uv_plane,
+        uv_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    );
+}
\ No newline at end of file
diff --git a/src/yuv_support.rs b/src/yuv_support.rs
new file mode 100644
index 0000000..7bd59e9
--- /dev/null
+++ b/src/yuv_support.rs
@@ -0,0 +1,363 @@
+#[derive(Copy, Clone)]
+pub struct CbCrInverseTransform<T> {
+    pub y_coef: T,
+    pub cr_coef: T,
+    pub cb_coef: T,
+    pub g_coeff_1: T,
+    pub g_coeff_2: T,
+}
+
+impl <T>CbCrInverseTransform<T> {
+    pub fn new(
+        y_coef: T,
+        cr_coef: T,
+        cb_coef: T,
+        g_coeff_1: T,
+        g_coeff_2: T,
+    ) -> CbCrInverseTransform<T> {
+        return CbCrInverseTransform {
+            y_coef,
+            cr_coef,
+            cb_coef,
+            g_coeff_1,
+            g_coeff_2,
+        };
+    }
+}
+
+impl CbCrInverseTransform<f32> {
+    pub fn to_integers(&self, precision: u32) -> CbCrInverseTransform<i32> {
+        let precision_scale: i32 = 1i32 << (precision as i32);
+        let cr_coef = (self.cr_coef * precision_scale as f32).round() as i32;
+        let cb_coef = (self.cb_coef * precision_scale as f32).round() as i32;
+        let y_coef = (self.y_coef * precision_scale as f32).round() as i32;
+        let g_coef_1 = (self.g_coeff_1 * precision_scale as f32).round() as i32;
+        let g_coef_2 = (self.g_coeff_2 * precision_scale as f32).round() as i32;
+        CbCrInverseTransform::<i32> {
+            y_coef,
+            cr_coef,
+            cb_coef,
+            g_coeff_1: g_coef_1,
+            g_coeff_2: g_coef_2,
+        }
+    }
+}
+
+pub fn get_inverse_transform(
+    range_bgra: u32,
+    range_y: u32,
+    range_uv: u32,
+    kr: f32,
+    kb: f32,
+) -> CbCrInverseTransform<f32> {
+    let range_uv = range_bgra as f32 / range_uv as f32;
+    let y_coef = range_bgra as f32 / range_y as f32;
+    let cr_coeff = (2f32 * (1f32 - kr)) * range_uv;
+    let cb_coeff = (2f32 * (1f32 - kb)) * range_uv;
+    let kg = 1.0f32 - kr - kb;
+    if kg == 0f32 {
+        panic!("1.0f - kr - kg must not be 0");
+    }
+    let g_coeff_1 = (2f32 * ((1f32 - kr) * kr / kg)) * range_uv;
+    let g_coeff_2 = (2f32 * ((1f32 - kb) * kb / kg)) * range_uv;
+    return CbCrInverseTransform::new(y_coef, cr_coeff, cb_coeff, g_coeff_1, g_coeff_2);
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialOrd, PartialEq)]
+pub struct CbCrForwardTransform<T> {
+    pub yr: T,
+    pub yg: T,
+    pub yb: T,
+    pub cb_r: T,
+    pub cb_g: T,
+    pub cb_b: T,
+    pub cr_r: T,
+    pub cr_g: T,
+    pub cr_b: T,
+}
+
+pub trait ToIntegerTransform {
+    fn to_integers(&self, precision: u32) -> CbCrForwardTransform<i32>;
+}
+
+impl ToIntegerTransform for CbCrForwardTransform<f32> {
+    fn to_integers(&self, precision: u32) -> CbCrForwardTransform<i32> {
+        let scale = (1 << precision) as f32;
+        return CbCrForwardTransform::<i32> {
+            yr: (self.yr * scale).round() as i32,
+            yg: (self.yg * scale).round() as i32,
+            yb: (self.yb * scale).round() as i32,
+            cb_r: (self.cb_r * scale).round() as i32,
+            cb_g: (self.cb_g * scale).round() as i32,
+            cb_b: (self.cb_b * scale).round() as i32,
+            cr_r: (self.cr_r * scale).round() as i32,
+            cr_g: (self.cr_g * scale).round() as i32,
+            cr_b: (self.cr_b * scale).round() as i32,
+        };
+    }
+}
+
+pub fn get_forward_transform(
+    range_bgra: u32,
+    range_y: u32,
+    range_uv: u32,
+    kr: f32,
+    kb: f32,
+) -> CbCrForwardTransform<f32> {
+    let kg = 1.0f32 - kr - kb;
+    if kg == 0f32 {
+        panic!("1.0f - kr - kg must not be 0");
+    }
+
+    let yr = kr * range_y as f32 / range_bgra as f32;
+    let yg = kg * range_y as f32 / range_bgra as f32;
+    let yb = kb * range_y as f32 / range_bgra as f32;
+
+    let cb_r = -0.5f32 * kr / (1f32 - kb) * range_uv as f32 / range_bgra as f32;
+    let cb_g = -0.5f32 * kg / (1f32 - kb) * range_uv as f32 / range_bgra as f32;
+    let cb_b = 0.5f32 * range_uv as f32 / range_bgra as f32;
+
+    let cr_r = 0.5f32 * range_uv as f32 / range_bgra as f32;
+    let cr_g = -0.5f32 * kg / (1f32 - kr) * range_uv as f32 / range_bgra as f32;
+    let cr_b = -0.5f32 * kb / (1f32 - kr) * range_uv as f32 / range_bgra as f32;
+    return CbCrForwardTransform {
+        yr,
+        yg,
+        yb,
+        cb_r,
+        cb_g,
+        cb_b,
+        cr_r,
+        cr_g,
+        cr_b,
+    };
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialOrd, PartialEq)]
+pub enum YuvRange {
+    TV,
+    Full,
+}
+
+#[derive(Copy, Clone, PartialOrd, PartialEq)]
+pub struct YuvChromaRange {
+    pub bias_y: u32,
+    pub bias_uv: u32,
+    pub range_y: u32,
+    pub range_uv: u32,
+    pub range: YuvRange,
+}
+
+pub fn get_yuv_range(depth: u32, range: YuvRange) -> YuvChromaRange {
+    return match range {
+        YuvRange::TV => YuvChromaRange {
+            bias_y: 16 << (depth - 8),
+            bias_uv: 1 << (depth - 1),
+            range_y: 219 << (depth - 8),
+            range_uv: 224 << (depth - 8),
+            range,
+        },
+        YuvRange::Full => YuvChromaRange {
+            bias_y: 0,
+            bias_uv: 1 << (depth - 1),
+            range_uv: 2f32.powi(depth as i32) as u32 - 1,
+            range_y: 2f32.powi(depth as i32) as u32 - 1,
+            range,
+        },
+    };
+}
+
+#[repr(C)]
+#[derive(Copy, Clone, PartialOrd, PartialEq)]
+pub enum YuvStandardMatrix {
+    Bt601,
+    Bt709,
+    Bt2020,
+    Smpte240,
+}
+
+#[derive(Copy, Clone)]
+pub struct YuvBias {
+    pub kr: f32,
+    pub kb: f32,
+}
+
+pub fn get_kr_kb(matrix: YuvStandardMatrix) -> YuvBias {
+    return match matrix {
+        YuvStandardMatrix::Bt601 => YuvBias {
+            kr: 0.299f32,
+            kb: 0.114f32,
+        },
+        YuvStandardMatrix::Bt709 => YuvBias {
+            kr: 0.2126f32,
+            kb: 0.0722f32,
+        },
+        YuvStandardMatrix::Bt2020 => YuvBias {
+            kr: 0.2627f32,
+            kb: 0.0593f32,
+        },
+        YuvStandardMatrix::Smpte240 => YuvBias {
+            kr: 0.087f32,
+            kb: 0.212f32,
+        },
+    };
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum YuvNVOrder {
+    UV = 0,
+    VU = 1,
+}
+
+impl From<u8> for YuvNVOrder {
+    #[inline(always)]
+    fn from(value: u8) -> Self {
+        match value {
+            0 => YuvNVOrder::UV,
+            1 => YuvNVOrder::VU,
+            _ => {
+                panic!("Unknown value")
+            }
+        }
+    }
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum YuvChromaSample {
+    YUV420 = 0,
+    YUV422 = 1,
+    YUV444 = 2,
+}
+
+impl From<u8> for YuvChromaSample {
+    #[inline(always)]
+    fn from(value: u8) -> Self {
+        match value {
+            0 => YuvChromaSample::YUV420,
+            1 => YuvChromaSample::YUV422,
+            2 => YuvChromaSample::YUV444,
+            _ => {
+                panic!("Unknown value")
+            }
+        }
+    }
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum YuvEndian {
+    BigEndian = 0,
+    LittleEndian = 1,
+}
+
+impl From<u8> for YuvEndian {
+    #[inline(always)]
+    fn from(value: u8) -> Self {
+        match value {
+            0 => YuvEndian::BigEndian,
+            1 => YuvEndian::LittleEndian,
+            _ => {
+                panic!("Unknown value")
+            }
+        }
+    }
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum YuvBytesPosition {
+    MostSignificantBytes = 0,
+    LeastSignificantBytes = 1,
+}
+
+impl From<u8> for YuvBytesPosition {
+    #[inline(always)]
+    fn from(value: u8) -> Self {
+        match value {
+            0 => YuvBytesPosition::MostSignificantBytes,
+            1 => YuvBytesPosition::LeastSignificantBytes,
+            _ => {
+                panic!("Unknown value")
+            }
+        }
+    }
+}
+
+#[repr(u8)]
+#[derive(Copy, Clone, PartialEq, Eq)]
+pub enum YuvSourceChannels {
+    Rgb = 0,
+    Rgba = 1,
+    Bgra = 2,
+}
+
+impl From<u8> for YuvSourceChannels {
+    #[inline(always)]
+    fn from(value: u8) -> Self {
+        match value {
+            0 => YuvSourceChannels::Rgb,
+            1 => YuvSourceChannels::Rgba,
+            2 => YuvSourceChannels::Bgra,
+            _ => {
+                panic!("Unknown value")
+            }
+        }
+    }
+}
+
+impl YuvSourceChannels {
+    #[inline(always)]
+    pub fn get_channels_count(&self) -> usize {
+        match self {
+            YuvSourceChannels::Rgb => 3,
+            YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 4,
+        }
+    }
+
+    #[inline(always)]
+    pub fn has_alpha(&self) -> bool {
+        match self {
+            YuvSourceChannels::Rgb => false,
+            YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => true,
+        }
+    }
+}
+
+impl YuvSourceChannels {
+    #[inline(always)]
+    pub fn get_r_channel_offset(&self) -> usize {
+        match self {
+            YuvSourceChannels::Rgb => 0,
+            YuvSourceChannels::Rgba => 0,
+            YuvSourceChannels::Bgra => 2,
+        }
+    }
+
+    #[inline(always)]
+    pub fn get_g_channel_offset(&self) -> usize {
+        match self {
+            YuvSourceChannels::Rgb => 1,
+            YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 1,
+        }
+    }
+
+    #[inline(always)]
+    pub fn get_b_channel_offset(&self) -> usize {
+        match self {
+            YuvSourceChannels::Rgb => 2,
+            YuvSourceChannels::Rgba => 2,
+            YuvSourceChannels::Bgra => 0,
+        }
+    }
+    #[inline(always)]
+    pub fn get_a_channel_offset(&self) -> usize {
+        match self {
+            YuvSourceChannels::Rgb => 0,
+            YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 3,
+        }
+    }
+}
diff --git a/src/yuv_to_rgba.rs b/src/yuv_to_rgba.rs
new file mode 100644
index 0000000..c8950ae
--- /dev/null
+++ b/src/yuv_to_rgba.rs
@@ -0,0 +1,751 @@
+#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+#[cfg(target_feature = "neon")]
+use std::arch::aarch64::{
+    uint8x16x3_t, uint8x16x4_t, uint8x8_t, vcombine_u8, vdup_n_u8, vdupq_n_s16, vdupq_n_u8,
+    vget_high_u8, vget_low_u8, vld1_u8, vld1q_u8, vmaxq_s16, vmovl_u8, vmull_high_u8, vmull_u8,
+    vmulq_s16, vqaddq_s16, vqshrun_n_s16, vreinterpretq_s16_u16, vst3q_u8, vst4q_u8, vsubq_s16,
+    vsubq_u8, vzip1_u8, vzip2_u8,
+};
+
+use crate::yuv_support::{
+    get_inverse_transform, get_kr_kb, get_yuv_range, YuvChromaSample, YuvRange, YuvSourceChannels,
+    YuvStandardMatrix,
+};
+
+fn yuv_to_rgbx<const DESTINATION_CHANNELS: u8, const SAMPLING: u8>(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    let chroma_subsampling: YuvChromaSample = SAMPLING.into();
+    let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into();
+    let channels = destination_channels.get_channels_count();
+    let range = get_yuv_range(8, range);
+    let kr_kb = get_kr_kb(matrix);
+    let transform = get_inverse_transform(255, range.range_y, range.range_uv, kr_kb.kr, kr_kb.kb);
+    let precision_scale: i32 = 1i32 << 6i32;
+    let cr_coef = (transform.cr_coef * precision_scale as f32).round() as i32;
+    let cb_coef = (transform.cb_coef * precision_scale as f32).round() as i32;
+    let y_coef = (transform.y_coef * precision_scale as f32).round() as i32;
+    let g_coef_1 = (transform.g_coeff_1 * precision_scale as f32).round() as i32;
+    let g_coef_2 = (transform.g_coeff_2 * precision_scale as f32).round() as i32;
+
+    let bias_y = range.bias_y as i32;
+    let bias_uv = range.bias_uv as i32;
+
+    let mut y_offset = 0usize;
+    let mut u_offset = 0usize;
+    let mut v_offset = 0usize;
+    let mut rgba_offset = 0usize;
+
+    let iterator_step = match chroma_subsampling {
+        YuvChromaSample::YUV420 => 2usize,
+        YuvChromaSample::YUV422 => 2usize,
+        YuvChromaSample::YUV444 => 1usize,
+    };
+
+    for y in 0..height as usize {
+        let mut cx = 0usize;
+
+        let mut uv_x = 0usize;
+
+        #[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
+        #[cfg(target_feature = "neon")]
+        unsafe {
+            let y_ptr = y_plane.as_ptr();
+            let u_ptr = u_plane.as_ptr();
+            let v_ptr = v_plane.as_ptr();
+            let rgba_ptr = rgba.as_mut_ptr();
+
+            let y_corr = vdupq_n_u8(bias_y as u8);
+            let uv_corr = vdupq_n_s16(bias_uv as i16);
+            let v_luma_coeff = vdupq_n_u8(y_coef as u8);
+            let v_luma_coeff_8 = vdup_n_u8(y_coef as u8);
+            let v_cr_coeff = vdupq_n_s16(cr_coef as i16);
+            let v_cb_coeff = vdupq_n_s16(cb_coef as i16);
+            let v_min_values = vdupq_n_s16(0i16);
+            let v_g_coeff_1 = vdupq_n_s16(-1i16 * g_coef_1 as i16);
+            let v_g_coeff_2 = vdupq_n_s16(-1i16 * g_coef_2 as i16);
+            let v_alpha = vdupq_n_u8(255u8);
+
+            while cx + 16 < width as usize {
+                let y_values = vsubq_u8(vld1q_u8(y_ptr.add(y_offset + cx)), y_corr);
+
+                let u_high_u8: uint8x8_t;
+                let v_high_u8: uint8x8_t;
+                let u_low_u8: uint8x8_t;
+                let v_low_u8: uint8x8_t;
+
+                match chroma_subsampling {
+                    YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => {
+                        let u_values = vld1_u8(u_ptr.add(u_offset + uv_x));
+                        let v_values = vld1_u8(v_ptr.add(v_offset + uv_x));
+
+                        u_high_u8 = vzip2_u8(u_values, u_values);
+                        v_high_u8 = vzip2_u8(v_values, v_values);
+                        u_low_u8 = vzip1_u8(u_values, u_values);
+                        v_low_u8 = vzip1_u8(v_values, v_values);
+                    }
+                    YuvChromaSample::YUV444 => {
+                        let u_values = vld1q_u8(u_ptr.add(u_offset + uv_x));
+                        let v_values = vld1q_u8(v_ptr.add(v_offset + uv_x));
+
+                        u_high_u8 = vget_high_u8(u_values);
+                        v_high_u8 = vget_high_u8(v_values);
+                        u_low_u8 = vget_low_u8(u_values);
+                        v_low_u8 = vget_low_u8(v_values);
+                    }
+                }
+
+                let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr);
+                let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr);
+                let y_high = vreinterpretq_s16_u16(vmull_high_u8(y_values, v_luma_coeff));
+
+                let r_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_high, vmulq_s16(v_high, v_cr_coeff)),
+                    v_min_values,
+                ));
+                let b_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_high, vmulq_s16(u_high, v_cb_coeff)),
+                    v_min_values,
+                ));
+                let g_high = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(
+                        y_high,
+                        vqaddq_s16(
+                            vmulq_s16(v_high, v_g_coeff_1),
+                            vmulq_s16(u_high, v_g_coeff_2),
+                        ),
+                    ),
+                    v_min_values,
+                ));
+
+                let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr);
+                let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr);
+                let y_low = vreinterpretq_s16_u16(vmull_u8(vget_low_u8(y_values), v_luma_coeff_8));
+
+                let r_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_low, vmulq_s16(v_low, v_cr_coeff)),
+                    v_min_values,
+                ));
+                let b_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(y_low, vmulq_s16(u_low, v_cb_coeff)),
+                    v_min_values,
+                ));
+                let g_low = vqshrun_n_s16::<6>(vmaxq_s16(
+                    vqaddq_s16(
+                        y_low,
+                        vqaddq_s16(vmulq_s16(v_low, v_g_coeff_1), vmulq_s16(u_low, v_g_coeff_2)),
+                    ),
+                    v_min_values,
+                ));
+
+                let r_values = vcombine_u8(r_low, r_high);
+                let g_values = vcombine_u8(g_low, g_high);
+                let b_values = vcombine_u8(b_low, b_high);
+
+                let dst_shift = rgba_offset + cx * channels;
+
+                match destination_channels {
+                    YuvSourceChannels::Rgb => {
+                        let dst_pack: uint8x16x3_t = uint8x16x3_t(r_values, g_values, b_values);
+                        vst3q_u8(rgba_ptr.add(dst_shift), dst_pack);
+                    }
+                    YuvSourceChannels::Rgba => {
+                        let dst_pack: uint8x16x4_t =
+                            uint8x16x4_t(b_values, g_values, r_values, v_alpha);
+                        vst4q_u8(rgba_ptr.add(dst_shift), dst_pack);
+                    }
+                    YuvSourceChannels::Bgra => {
+                        let dst_pack: uint8x16x4_t =
+                            uint8x16x4_t(r_values, g_values, b_values, v_alpha);
+                        vst4q_u8(rgba_ptr.add(dst_shift), dst_pack);
+                    }
+                }
+
+                cx += 16;
+
+                match chroma_subsampling {
+                    YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => {
+                        uv_x += 8;
+                    }
+                    YuvChromaSample::YUV444 => {
+                        uv_x += 16;
+                    }
+                }
+            }
+        }
+
+        for x in (cx..width as usize).step_by(iterator_step) {
+            let y_value = (y_plane[y_offset + x] as i32 - bias_y) * y_coef;
+
+            let u_pos = match chroma_subsampling {
+                YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => u_offset + x / 2,
+                YuvChromaSample::YUV444 => u_offset + x,
+            };
+
+            let cb_value = u_plane[u_pos] as i32 - bias_uv;
+
+            let v_pos = match chroma_subsampling {
+                YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => v_offset + x / 2,
+                YuvChromaSample::YUV444 => v_offset + x,
+            };
+
+            let cr_value = v_plane[v_pos] as i32 - bias_uv;
+
+            let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0);
+            let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0);
+            let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6)
+                .min(255)
+                .max(0);
+
+            let px = x * channels;
+
+            let rgba_shift = rgba_offset + px;
+
+            rgba[rgba_shift + destination_channels.get_r_channel_offset()] = r as u8;
+            rgba[rgba_shift + destination_channels.get_g_channel_offset()] = g as u8;
+            rgba[rgba_shift + destination_channels.get_b_channel_offset()] = b as u8;
+            if destination_channels.has_alpha() {
+                rgba[rgba_shift + destination_channels.get_a_channel_offset()] = 255;
+            }
+
+            if chroma_subsampling == YuvChromaSample::YUV420
+                || chroma_subsampling == YuvChromaSample::YUV422
+            {
+                if x + 1 < width as usize {
+                    let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef;
+
+                    let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0);
+                    let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0);
+                    let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6)
+                        .min(255)
+                        .max(0);
+
+                    let next_px = (x + 1) * channels;
+
+                    let rgba_shift = rgba_offset + next_px;
+
+                    rgba[rgba_shift + destination_channels.get_r_channel_offset()] = r as u8;
+                    rgba[rgba_shift + destination_channels.get_g_channel_offset()] = g as u8;
+                    rgba[rgba_shift + destination_channels.get_b_channel_offset()] = b as u8;
+                    if destination_channels.has_alpha() {
+                        rgba[rgba_shift + destination_channels.get_a_channel_offset()] = 255;
+                    }
+                }
+            }
+
+            uv_x += 1;
+        }
+
+        y_offset += y_stride as usize;
+        rgba_offset += rgba_stride as usize;
+        match chroma_subsampling {
+            YuvChromaSample::YUV420 => {
+                if y & 1 == 1 {
+                    u_offset += u_stride as usize;
+                    v_offset += v_stride as usize;
+                }
+            }
+            YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => {
+                u_offset += u_stride as usize;
+                v_offset += v_stride as usize;
+            }
+        }
+    }
+}
+
+/// Convert YUV 420 format to RGB format.
+///
+/// This function takes YUV 420 planar format data with 8-bit precision,
+/// and converts it to RGB format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgb_data` - A mutable slice to store the converted RGB data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv420_to_rgb(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgb: &mut [u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 420 format to RGBA format.
+///
+/// This function takes YUV 420 planar format data with 8-bit precision,
+/// and converts it to RGBA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgba_data` - A mutable slice to store the converted RGBA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv420_to_rgba(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 420 format to BGRA format.
+///
+/// This function takes YUV 420 planar format data with 8-bit precision,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv420_to_bgra(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV420 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 422 format to RGB format.
+///
+/// This function takes YUV 422 data with 8-bit precision,
+/// and converts it to RGB format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgb_data` - A mutable slice to store the converted RGB data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv422_to_rgb(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgb: &mut [u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 422 format to RGBA format.
+///
+/// This function takes YUV 422 data with 8-bit precision,
+/// and converts it to RGBA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted RGBA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv422_to_rgba(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 422 format to BGRA format.
+///
+/// This function takes YUV 422 data with 8-bit precision,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv422_to_bgra(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV422 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 444 format to RGBA format.
+///
+/// This function takes YUV 444 data with 8-bit precision,
+/// and converts it to RGBA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgba_data` - A mutable slice to store the converted RGBA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv444_to_rgba(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgba: &mut [u8],
+    rgba_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgba,
+        rgba_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 444 format to BGRA format.
+///
+/// This function takes YUV 444 data with 8-bit precision,
+/// and converts it to BGRA format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `bgra_data` - A mutable slice to store the converted BGRA data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv444_to_bgra(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    bgra: &mut [u8],
+    bgra_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        bgra,
+        bgra_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}
+
+/// Convert YUV 444 format to RGB format.
+///
+/// This function takes YUV 444 data with 8-bit precision,
+/// and converts it to RGB format with 8-bit per channel precision.
+///
+/// # Arguments
+///
+/// * `y_plane` - A slice to load the Y (luminance) plane data.
+/// * `y_stride` - The stride (bytes per row) for the Y plane.
+/// * `u_plane` - A slice to load the U (chrominance) plane data.
+/// * `u_stride` - The stride (bytes per row) for the U plane.
+/// * `v_plane` - A slice to load the V (chrominance) plane data.
+/// * `v_stride` - The stride (bytes per row) for the V plane.
+/// * `width` - The width of the YUV image.
+/// * `height` - The height of the YUV image.
+/// * `rgb_data` - A mutable slice to store the converted RGB data.
+/// * `range` - The YUV range (limited or full).
+/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other).
+///
+/// # Panics
+///
+/// This function panics if the lengths of the planes or the input BGRA data are not valid based
+/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided.
+///
+pub fn yuv444_to_rgb(
+    y_plane: &[u8],
+    y_stride: u32,
+    u_plane: &[u8],
+    u_stride: u32,
+    v_plane: &[u8],
+    v_stride: u32,
+    rgb: &mut [u8],
+    rgb_stride: u32,
+    width: u32,
+    height: u32,
+    range: YuvRange,
+    matrix: YuvStandardMatrix,
+) {
+    yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV444 as u8 }>(
+        y_plane,
+        y_stride,
+        u_plane,
+        u_stride,
+        v_plane,
+        v_stride,
+        rgb,
+        rgb_stride,
+        width,
+        height,
+        range,
+        matrix,
+    )
+}