diff --git a/.github/workflows/build_push.yml b/.github/workflows/build_push.yml new file mode 100644 index 0000000..e351c42 --- /dev/null +++ b/.github/workflows/build_push.yml @@ -0,0 +1,28 @@ +name: "Build" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + push: + branches: + - '*' + - '!ci_test_*' + tags-ignore: + - '*' + pull_request: + branches: + - '*' + - '!ci_test_*' + +jobs: + build: + name: Build + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - run: cargo build --all-features + - name: Test release pipeline + run: cargo publish --dry-run --manifest-path src/lib/Cargo.toml \ No newline at end of file diff --git a/.github/workflows/publish_release.yml b/.github/workflows/publish_release.yml new file mode 100644 index 0000000..48a7444 --- /dev/null +++ b/.github/workflows/publish_release.yml @@ -0,0 +1,35 @@ +name: Create Release + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +on: + push: + tags: + - '*' + +jobs: + build_and_publish: + name: Build + runs-on: ubuntu-latest + environment: Cargo + steps: + - uses: actions/checkout@v4 + - uses: actions-rust-lang/setup-rust-toolchain@v1 + - name: Make a release + env: + CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_TOKEN }} + run: cargo publish --manifest-path src/lib/Cargo.toml + + release: + runs-on: ubuntu-latest + permissions: + contents: write + needs: + - build_and_publish + steps: + - uses: actions/checkout@v3 + - uses: ncipollo/release-action@v1 + with: + bodyFile: 'CHANGELOG.md' \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a0038a --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +.idea \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..4d03924 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1 @@ +Added conversions \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..2fb71b2 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "yuvutils-rs" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..8c25a30 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "yuvutils-rs" +version = "0.1.0" +edition = "2021" +description = "Rust utilities for YUV format handling and conversion." +readme = "README.md" +keywords = ["yuv"] +license = "Apache-2.0 OR BSD-3-Clause" +authors = ["Radzivon Bartoshyk"] +documentation = "https://github.com/awxkee/yuvutils-rs" +categories = ["multimedia::images", "multimedia::video"] +homepage = "https://github.com/awxkee/yuvutils-rs" +repository = "https://github.com/awxkee/yuvutils-rs" +exclude = ["*.jpg"] + +[dependencies] diff --git a/LICENSE-BSD.md b/LICENSE-BSD.md new file mode 100644 index 0000000..bf616fd --- /dev/null +++ b/LICENSE-BSD.md @@ -0,0 +1,26 @@ +Copyright (c) Radzivon Bartoshyk. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..86a13a8 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2024 Radzivon Bartoshyk + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..12cf7ac --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# Rust utilities for YUV format handling and conversion. \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..1dd020d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,42 @@ +mod rgba_to_yuv; +mod yuv_nv12; +mod yuv_nv12_p10; +mod yuv_support; +mod yuv_to_rgba; + +pub use yuv_support::YuvStandardMatrix; +pub use yuv_support::YuvRange; + +pub use yuv_nv12_p10::yuv_nv12_p10_to_bgra_be; +pub use yuv_nv12_p10::yuv_nv16_p10_to_bgra_be; +pub use yuv_nv12_p10::yuv_nv12_p10_to_bgra; +pub use yuv_nv12_p10::yuv_nv16_p10_to_bgra; +pub use yuv_nv12_p10::yuv_nv12_p10_msb_to_bgra; +pub use yuv_nv12_p10::yuv_nv16_p10_msb_to_bgra; + +pub use yuv_nv12::yuv_nv12_to_bgra; +pub use yuv_nv12::yuv_nv21_to_bgra; +pub use yuv_nv12::yuv_nv12_to_rgba; +pub use yuv_nv12::yuv_nv21_to_rgba; +pub use yuv_nv12::yuv_nv12_to_rgb; +pub use yuv_nv12::yuv_nv21_to_rgb; + +pub use yuv_to_rgba::yuv420_to_rgb; +pub use yuv_to_rgba::yuv420_to_rgba; +pub use yuv_to_rgba::yuv420_to_bgra; +pub use yuv_to_rgba::yuv422_to_rgb; +pub use yuv_to_rgba::yuv422_to_rgba; +pub use yuv_to_rgba::yuv422_to_bgra; +pub use yuv_to_rgba::yuv444_to_rgba; +pub use yuv_to_rgba::yuv444_to_bgra; +pub use yuv_to_rgba::yuv444_to_rgb; + +pub use rgba_to_yuv::rgb_to_yuv420; +pub use rgba_to_yuv::rgba_to_yuv420; +pub use rgba_to_yuv::bgra_to_yuv420; +pub use rgba_to_yuv::rgb_to_yuv422; +pub use rgba_to_yuv::rgba_to_yuv422; +pub use rgba_to_yuv::bgra_to_yuv422; +pub use rgba_to_yuv::rgb_to_yuv444; +pub use rgba_to_yuv::rgba_to_yuv444; +pub use rgba_to_yuv::bgra_to_yuv444; \ No newline at end of file diff --git a/src/rgba_to_yuv.rs b/src/rgba_to_yuv.rs new file mode 100644 index 0000000..307b637 --- /dev/null +++ b/src/rgba_to_yuv.rs @@ -0,0 +1,763 @@ +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +#[cfg(target_feature = "neon")] +use std::arch::aarch64::{ + uint8x16_t, vcombine_u16, vcombine_u8, vdupq_n_s16, vdupq_n_s32, vget_low_s16, + vget_low_u8, vld3q_u8, vld4q_u8, vmaxq_s32, vmlal_high_s16, vmlal_s16, + vmovl_high_u8, vmovl_u8, vpaddlq_u8, vqmovn_u16, vqshrun_n_s32, + vreinterpretq_s16_u16, vshrn_n_u16, vst1_u8, vst1q_u8, +}; + +use crate::yuv_support::{ + get_forward_transform, get_kr_kb, get_yuv_range, ToIntegerTransform, YuvChromaSample, YuvRange, + YuvSourceChannels, YuvStandardMatrix, +}; + +fn rgbx_to_yuv8( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgba: &[u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + let chroma_subsampling: YuvChromaSample = SAMPLING.into(); + let source_channels: YuvSourceChannels = ORIGIN_CHANNELS.into(); + let channels = source_channels.get_channels_count(); + let range = get_yuv_range(8, range); + let kr_kb = get_kr_kb(matrix); + let max_range_p8 = (2f32.powi(8) - 1f32) as u32; + let transform_precise = get_forward_transform( + max_range_p8, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + ); + let transform = transform_precise.to_integers(8); + let precision_scale = (1 << 8) as f32; + let bias_y = ((range.bias_y as f32 + 0.5f32) * precision_scale) as i32; + let bias_uv = ((range.bias_uv as f32 + 0.5f32) * precision_scale) as i32; + + let iterator_step = match chroma_subsampling { + YuvChromaSample::YUV420 => 2usize, + YuvChromaSample::YUV422 => 2usize, + YuvChromaSample::YUV444 => 1usize, + }; + + let mut y_offset = 0usize; + let mut u_offset = 0usize; + let mut v_offset = 0usize; + let mut rgba_offset = 0usize; + + for y in 0..height as usize { + let mut cx = 0usize; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + #[cfg(target_feature = "neon")] + unsafe { + let y_ptr = y_plane.as_mut_ptr(); + let u_ptr = u_plane.as_mut_ptr(); + let v_ptr = v_plane.as_mut_ptr(); + let rgba_ptr = rgba.as_ptr(); + + let y_bias = vdupq_n_s32(bias_y); + let uv_bias = vdupq_n_s32(bias_uv); + let v_yr = vdupq_n_s16(transform.yr as i16); + let v_yg = vdupq_n_s16(transform.yg as i16); + let v_yb = vdupq_n_s16(transform.yb as i16); + let v_cb_r = vdupq_n_s16(transform.cb_r as i16); + let v_cb_g = vdupq_n_s16(transform.cb_g as i16); + let v_cb_b = vdupq_n_s16(transform.cb_b as i16); + let v_cr_r = vdupq_n_s16(transform.cr_r as i16); + let v_cr_g = vdupq_n_s16(transform.cr_g as i16); + let v_cr_b = vdupq_n_s16(transform.cr_b as i16); + let v_zeros = vdupq_n_s32(0i32); + + while cx + 16 < width as usize { + let r_values_u8: uint8x16_t; + let g_values_u8: uint8x16_t; + let b_values_u8: uint8x16_t; + + match source_channels { + YuvSourceChannels::Rgb => { + let rgb_values = vld3q_u8(rgba_ptr.add(rgba_offset + cx * channels)); + r_values_u8 = rgb_values.0; + g_values_u8 = rgb_values.1; + b_values_u8 = rgb_values.2; + } + YuvSourceChannels::Rgba => { + let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels)); + r_values_u8 = rgb_values.0; + g_values_u8 = rgb_values.1; + b_values_u8 = rgb_values.2; + } + YuvSourceChannels::Bgra => { + let rgb_values = vld4q_u8(rgba_ptr.add(rgba_offset + cx * channels)); + r_values_u8 = rgb_values.2; + g_values_u8 = rgb_values.1; + b_values_u8 = rgb_values.0; + } + } + + let r_high = vreinterpretq_s16_u16(vmovl_high_u8(r_values_u8)); + let g_high = vreinterpretq_s16_u16(vmovl_high_u8(g_values_u8)); + let b_high = vreinterpretq_s16_u16(vmovl_high_u8(b_values_u8)); + + let r_h_low = vget_low_s16(r_high); + let g_h_low = vget_low_s16(g_high); + let b_h_low = vget_low_s16(b_high); + + let mut y_h_high = vmlal_high_s16(y_bias, r_high, v_yr); + y_h_high = vmlal_high_s16(y_h_high, g_high, v_yg); + y_h_high = vmlal_high_s16(y_h_high, b_high, v_yb); + y_h_high = vmaxq_s32(y_h_high, v_zeros); + + let mut y_h_low = vmlal_s16(y_bias, r_h_low, vget_low_s16(v_yr)); + y_h_low = vmlal_s16(y_h_low, g_h_low, vget_low_s16(v_yg)); + y_h_low = vmlal_s16(y_h_low, b_h_low, vget_low_s16(v_yb)); + y_h_low = vmaxq_s32(y_h_low, v_zeros); + + let y_high = + vcombine_u16(vqshrun_n_s32::<8>(y_h_low), vqshrun_n_s32::<8>(y_h_high)); + + let mut cb_h_high = vmlal_high_s16(uv_bias, r_high, v_cb_r); + cb_h_high = vmlal_high_s16(cb_h_high, g_high, v_cb_g); + cb_h_high = vmlal_high_s16(cb_h_high, b_high, v_cb_b); + + let mut cb_h_low = vmlal_s16(uv_bias, r_h_low, vget_low_s16(v_cb_r)); + cb_h_low = vmlal_s16(cb_h_low, g_h_low, vget_low_s16(v_cb_g)); + cb_h_low = vmlal_s16(cb_h_low, b_h_low, vget_low_s16(v_cb_b)); + + let cb_high = + vcombine_u16(vqshrun_n_s32::<8>(cb_h_low), vqshrun_n_s32::<8>(cb_h_high)); + + let mut cr_h_high = vmlal_high_s16(uv_bias, r_high, v_cr_r); + cr_h_high = vmlal_high_s16(cr_h_high, g_high, v_cr_g); + cr_h_high = vmlal_high_s16(cr_h_high, b_high, v_cr_b); + + let mut cr_h_low = vmlal_s16(uv_bias, r_h_low, vget_low_s16(v_cr_r)); + cr_h_low = vmlal_s16(cr_h_low, g_h_low, vget_low_s16(v_cr_g)); + cr_h_low = vmlal_s16(cr_h_low, b_h_low, vget_low_s16(v_cr_b)); + + let cr_high = + vcombine_u16(vqshrun_n_s32::<8>(cr_h_low), vqshrun_n_s32::<8>(cr_h_high)); + + let r_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(r_values_u8))); + let g_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(g_values_u8))); + let b_low = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(b_values_u8))); + + let r_l_low = vget_low_s16(r_low); + let g_l_low = vget_low_s16(g_low); + let b_l_low = vget_low_s16(b_low); + + let mut y_l_high = vmlal_high_s16(y_bias, r_low, v_yr); + y_l_high = vmlal_high_s16(y_l_high, g_low, v_yg); + y_l_high = vmlal_high_s16(y_l_high, b_low, v_yb); + y_l_high = vmaxq_s32(y_l_high, v_zeros); + + let mut y_l_low = vmlal_s16(y_bias, r_l_low, vget_low_s16(v_yr)); + y_l_low = vmlal_s16(y_l_low, g_l_low, vget_low_s16(v_yg)); + y_l_low = vmlal_s16(y_l_low, b_l_low, vget_low_s16(v_yb)); + y_l_low = vmaxq_s32(y_l_low, v_zeros); + + let y_low = vcombine_u16(vqshrun_n_s32::<8>(y_l_low), vqshrun_n_s32::<8>(y_l_high)); + + let mut cb_l_high = vmlal_high_s16(uv_bias, r_low, v_cb_r); + cb_l_high = vmlal_high_s16(cb_l_high, g_low, v_cb_g); + cb_l_high = vmlal_high_s16(cb_l_high, b_low, v_cb_b); + + let mut cb_l_low = vmlal_s16(uv_bias, r_l_low, vget_low_s16(v_cb_r)); + cb_l_low = vmlal_s16(cb_l_low, g_l_low, vget_low_s16(v_cb_g)); + cb_l_low = vmlal_s16(cb_l_low, b_l_low, vget_low_s16(v_cb_b)); + + let cb_low = + vcombine_u16(vqshrun_n_s32::<8>(cb_l_low), vqshrun_n_s32::<8>(cb_l_high)); + + let mut cr_l_high = vmlal_high_s16(uv_bias, r_low, v_cr_r); + cr_l_high = vmlal_high_s16(cr_l_high, g_low, v_cr_g); + cr_l_high = vmlal_high_s16(cr_l_high, b_low, v_cr_b); + + let mut cr_l_low = vmlal_s16(uv_bias, r_l_low, vget_low_s16(v_cr_r)); + cr_l_low = vmlal_s16(cr_l_low, g_l_low, vget_low_s16(v_cr_g)); + cr_l_low = vmlal_s16(cr_l_low, b_l_low, vget_low_s16(v_cr_b)); + + let cr_low = + vcombine_u16(vqshrun_n_s32::<8>(cr_l_low), vqshrun_n_s32::<8>(cr_l_high)); + + let y = vcombine_u8(vqmovn_u16(y_low), vqmovn_u16(y_high)); + let cb = vcombine_u8(vqmovn_u16(cb_low), vqmovn_u16(cb_high)); + let cr = vcombine_u8(vqmovn_u16(cr_low), vqmovn_u16(cr_high)); + vst1q_u8(y_ptr.add(y_offset + cx), y); + + match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => { + let cb_s = vshrn_n_u16::<1>(vpaddlq_u8(cb)); + let cr_s = vshrn_n_u16::<1>(vpaddlq_u8(cr)); + vst1_u8(u_ptr.add(u_offset + cx / 2), cb_s); + vst1_u8(v_ptr.add(u_offset + cx / 2), cr_s); + } + YuvChromaSample::YUV444 => { + vst1q_u8(u_ptr.add(u_offset + cx), cb); + vst1q_u8(v_ptr.add(v_offset + cx), cr); + } + } + + cx += 16; + } + } + + for x in (cx..width as usize).step_by(iterator_step) { + let px = x * channels; + let r = rgba[rgba_offset + px + source_channels.get_r_channel_offset()] as i32; + let g = rgba[rgba_offset + px + source_channels.get_g_channel_offset()] as i32; + let b = rgba[rgba_offset + px + source_channels.get_b_channel_offset()] as i32; + let y_0 = (r * transform.yr + g * transform.yg + b * transform.yb + bias_y) >> 8; + let cb = (r * transform.cb_r + g * transform.cb_g + b * transform.cb_b + bias_uv) >> 8; + let cr = (r * transform.cr_r + g * transform.cr_g + b * transform.cr_b + bias_uv) >> 8; + y_plane[y_offset + x] = y_0 as u8; + let u_pos = match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => u_offset + x / 2, + YuvChromaSample::YUV444 => u_offset + x, + }; + u_plane[u_pos] = cb as u8; + let v_pos = match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => v_offset + x / 2, + YuvChromaSample::YUV444 => v_offset + x, + }; + v_plane[v_pos] = cr as u8; + match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => { + if x + 1 < width as usize { + let next_px = (x + 1) * channels; + let r = rgba[rgba_offset + next_px + source_channels.get_r_channel_offset()] + as i32; + let g = rgba[rgba_offset + next_px + source_channels.get_g_channel_offset()] + as i32; + let b = rgba[rgba_offset + next_px + source_channels.get_b_channel_offset()] + as i32; + let y_1 = + (r * transform.yr + g * transform.yg + b * transform.yb + bias_y) >> 8; + y_plane[y_offset + x + 1] = y_1 as u8; + } + } + _ => {} + } + } + + y_offset += y_stride as usize; + rgba_offset += rgba_stride as usize; + match chroma_subsampling { + YuvChromaSample::YUV420 => { + if y & 1 == 1 { + u_offset += u_stride as usize; + v_offset += v_stride as usize; + } + } + YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => { + u_offset += u_stride as usize; + v_offset += v_stride as usize; + } + } + } +} + +/// Convert RGB image data to YUV422 planar format. +/// +/// This function performs RGB to YUV conversion and stores the result in YUV422 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgb` - The input RGB image data slice. +/// * `rgb_stride` - The stride (bytes per row) for the RGB image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGB data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgb_to_yuv422( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgb: &[u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert RGBA image data to YUV422 planar format. +/// +/// This function performs RGBA to YUV conversion and stores the result in YUV422 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgba` - The input RGBA image data slice. +/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGBA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgba_to_yuv422( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgba: &[u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert BGRA image data to YUV422 planar format. +/// +/// This function performs BGRA to YUV conversion and stores the result in YUV422 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `bgra` - The input BGRA image data slice. +/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn bgra_to_yuv422( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + bgra: &[u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert RGB image data to YUV420 planar format. +/// +/// This function performs RGB to YUV conversion and stores the result in YUV420 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgb` - The input RGB image data slice. +/// * `rgb_stride` - The stride (bytes per row) for the RGB image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGBA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgb_to_yuv420( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgb: &[u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert RGBA image data to YUV420 planar format. +/// +/// This function performs RGBA to YUV conversion and stores the result in YUV420 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgba` - The input RGBA image data slice. +/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGBA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgba_to_yuv420( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgba: &[u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert BGRA image data to YUV420 planar format. +/// +/// This function performs BGRA to YUV conversion and stores the result in YUV420 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `bgra` - The input BGRA image data slice. +/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn bgra_to_yuv420( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + bgra: &[u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert RGB image data to YUV444 planar format. +/// +/// This function performs RGB to YUV conversion and stores the result in YUV444 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgb` - The input RGB image data slice. +/// * `rgb_stride` - The stride (bytes per row) for the RGB image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGB data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgb_to_yuv444( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgb: &[u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert RGBA image data to YUV444 planar format. +/// +/// This function performs RGBA to YUV conversion and stores the result in YUV444 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `rgba` - The input RGBA image data slice. +/// * `rgba_stride` - The stride (bytes per row) for the RGBA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input RGBA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn rgba_to_yuv444( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgba: &[u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert BGRA image data to YUV444 planar format. +/// +/// This function performs BGRA to YUV conversion and stores the result in YUV444 planar format, +/// with separate planes for Y (luminance), U (chrominance), and V (chrominance) components. +/// +/// # Arguments +/// +/// * `y_plane` - A mutable slice to store the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A mutable slice to store the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A mutable slice to store the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `bgra` - The input BGRA image data slice. +/// * `bgra_stride` - The stride (bytes per row) for the BGRA image data. +/// * `width` - The width of the image in pixels. +/// * `height` - The height of the image in pixels. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn bgra_to_yuv444( + y_plane: &mut [u8], + y_stride: u32, + u_plane: &mut [u8], + u_stride: u32, + v_plane: &mut [u8], + v_stride: u32, + rgba: &[u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + rgbx_to_yuv8::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ); +} diff --git a/src/yuv_nv12.rs b/src/yuv_nv12.rs new file mode 100644 index 0000000..7ac834a --- /dev/null +++ b/src/yuv_nv12.rs @@ -0,0 +1,551 @@ +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +#[cfg(target_feature = "neon")] +use std::arch::aarch64::{ + uint8x16x2_t, uint8x16x3_t, uint8x16x4_t, uint8x8_t, uint8x8x2_t, vcombine_u8, vdup_n_u8, + vdupq_n_s16, vdupq_n_u8, vget_high_u8, vget_low_u8, vld1q_u8, vld2_u8, vld2q_u8, vmaxq_s16, + vmovl_u8, vmull_high_u8, vmull_u8, vmulq_s16, vqaddq_s16, vqshrun_n_s16, vreinterpretq_s16_u16, + vst3q_u8, vst4q_u8, vsubq_s16, vsubq_u8, vzip1_u8, vzip2_u8, +}; + +use crate::yuv_support::{ + get_inverse_transform, get_kr_kb, get_yuv_range, YuvChromaSample, YuvNVOrder, YuvRange, + YuvSourceChannels, YuvStandardMatrix, +}; + +fn yuv_nv12_to_rgbx< + const UV_ORDER: u8, + const DESTINATION_CHANNELS: u8, + const YUV_CHROMA_SAMPLING: u8, +>( + y_plane: &[u8], + y_stride: u32, + uv_plane: &[u8], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + let order: YuvNVOrder = UV_ORDER.into(); + let range = get_yuv_range(8, range); + let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); + let chroma_subsampling: YuvChromaSample = YUV_CHROMA_SAMPLING.into(); + let channels = destination_channels.get_channels_count(); + let kr_kb = get_kr_kb(matrix); + let transform = get_inverse_transform(255, range.range_y, range.range_uv, kr_kb.kr, kr_kb.kb); + let i_transform = transform.to_integers(6u32); + let cr_coef = i_transform.cr_coef; + let cb_coef = i_transform.cb_coef; + let y_coef = i_transform.y_coef; + let g_coef_1 = i_transform.g_coeff_1; + let g_coef_2 = i_transform.g_coeff_2; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + let mut y_offset = 0usize; + let mut uv_offset = 0usize; + let mut dst_offset = 0usize; + + for y in 0..height as usize { + let mut x = 0usize; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + #[cfg(target_feature = "neon")] + unsafe { + let y_ptr = y_plane.as_ptr(); + let uv_ptr = uv_plane.as_ptr(); + let bgra_ptr = bgra.as_mut_ptr(); + + let y_corr = vdupq_n_u8(bias_y as u8); + let uv_corr = vdupq_n_s16(bias_uv as i16); + let v_luma_coeff = vdupq_n_u8(y_coef as u8); + let v_luma_coeff_8 = vdup_n_u8(y_coef as u8); + let v_cr_coeff = vdupq_n_s16(cr_coef as i16); + let v_cb_coeff = vdupq_n_s16(cb_coef as i16); + let v_min_values = vdupq_n_s16(0i16); + let v_g_coeff_1 = vdupq_n_s16(-1i16 * (g_coef_1 as i16)); + let v_g_coeff_2 = vdupq_n_s16(-1i16 * (g_coef_2 as i16)); + let v_alpha = vdupq_n_u8(255u8); + while x + 16 < width as usize { + let y_values = vsubq_u8(vld1q_u8(y_ptr.add(y_offset + x)), y_corr); + + let u_high_u8: uint8x8_t; + let v_high_u8: uint8x8_t; + let u_low_u8: uint8x8_t; + let v_low_u8: uint8x8_t; + + match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => { + let mut uv_values = vld2_u8(uv_ptr.add(uv_offset + x)); + if order == YuvNVOrder::VU { + uv_values = uint8x8x2_t(uv_values.1, uv_values.0); + } + + u_high_u8 = vzip2_u8(uv_values.0, uv_values.0); + v_high_u8 = vzip2_u8(uv_values.1, uv_values.1); + u_low_u8 = vzip1_u8(uv_values.0, uv_values.0); + v_low_u8 = vzip1_u8(uv_values.1, uv_values.1); + } + YuvChromaSample::YUV444 => { + let mut uv_values = vld2q_u8(uv_ptr.add(uv_offset + x * 2)); + if order == YuvNVOrder::VU { + uv_values = uint8x16x2_t(uv_values.1, uv_values.0); + } + u_high_u8 = vget_high_u8(uv_values.0); + v_high_u8 = vget_high_u8(uv_values.1); + u_low_u8 = vget_low_u8(uv_values.0); + v_low_u8 = vget_low_u8(uv_values.1); + } + } + + let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr); + let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr); + let y_high = vreinterpretq_s16_u16(vmull_high_u8(y_values, v_luma_coeff)); + + let r_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_high, vmulq_s16(v_high, v_cr_coeff)), + v_min_values, + )); + let b_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_high, vmulq_s16(u_high, v_cb_coeff)), + v_min_values, + )); + let g_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16( + y_high, + vqaddq_s16( + vmulq_s16(v_high, v_g_coeff_1), + vmulq_s16(u_high, v_g_coeff_2), + ), + ), + v_min_values, + )); + + let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); + let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); + let y_low = vreinterpretq_s16_u16(vmull_u8(vget_low_u8(y_values), v_luma_coeff_8)); + + let r_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_low, vmulq_s16(v_low, v_cr_coeff)), + v_min_values, + )); + let b_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_low, vmulq_s16(u_low, v_cb_coeff)), + v_min_values, + )); + let g_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16( + y_low, + vqaddq_s16(vmulq_s16(v_low, v_g_coeff_1), vmulq_s16(u_low, v_g_coeff_2)), + ), + v_min_values, + )); + + let r_values = vcombine_u8(r_low, r_high); + let g_values = vcombine_u8(g_low, g_high); + let b_values = vcombine_u8(b_low, b_high); + + let dst_shift = dst_offset + x * channels; + + match destination_channels { + YuvSourceChannels::Rgb => { + let dst_pack: uint8x16x3_t = uint8x16x3_t(r_values, g_values, b_values); + vst3q_u8(bgra_ptr.add(dst_shift), dst_pack); + } + YuvSourceChannels::Rgba => { + let dst_pack: uint8x16x4_t = + uint8x16x4_t(b_values, g_values, r_values, v_alpha); + vst4q_u8(bgra_ptr.add(dst_shift), dst_pack); + } + YuvSourceChannels::Bgra => { + let dst_pack: uint8x16x4_t = + uint8x16x4_t(r_values, g_values, b_values, v_alpha); + vst4q_u8(bgra_ptr.add(dst_shift), dst_pack); + } + } + + x += 16; + } + } + + while x < width as usize { + let y_value = (y_plane[y_offset + x] as i32 - bias_y) * y_coef; + let cb_value: i32; + let cr_value: i32; + let cb_pos = uv_offset + x; + let cr_pos = uv_offset + x + 1; + + match order { + YuvNVOrder::UV => { + cb_value = uv_plane[cb_pos] as i32 - bias_uv; + cr_value = uv_plane[cr_pos] as i32 - bias_uv; + } + YuvNVOrder::VU => { + cb_value = uv_plane[cr_pos] as i32 - bias_uv; + cr_value = uv_plane[cb_pos] as i32 - bias_uv; + } + } + + let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0); + let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6) + .min(255) + .max(0); + + let px = x * channels; + + let dst_shift = dst_offset + px; + + bgra[dst_shift + destination_channels.get_b_channel_offset()] = b as u8; + bgra[dst_shift + destination_channels.get_g_channel_offset()] = g as u8; + bgra[dst_shift + destination_channels.get_r_channel_offset()] = r as u8; + if destination_channels.has_alpha() { + bgra[dst_shift + destination_channels.get_a_channel_offset()] = 255; + } + + if chroma_subsampling == YuvChromaSample::YUV422 + || chroma_subsampling == YuvChromaSample::YUV420 + { + x += 1; + if x + 1 < width as usize { + let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef; + + let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0); + let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6) + .min(255) + .max(0); + + let next_px = x * channels; + let dst_shift = dst_offset + next_px; + bgra[dst_shift + destination_channels.get_b_channel_offset()] = b as u8; + bgra[dst_shift + destination_channels.get_g_channel_offset()] = g as u8; + bgra[dst_shift + destination_channels.get_r_channel_offset()] = r as u8; + if destination_channels.has_alpha() { + bgra[dst_shift + destination_channels.get_a_channel_offset()] = 255; + } + } + } + + x += 1; + } + + match chroma_subsampling { + YuvChromaSample::YUV420 => { + if y & 1 == 1 { + uv_offset += uv_stride as usize; + } + } + YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => { + uv_offset += uv_stride as usize; + } + } + + dst_offset += bgra_stride as usize; + y_offset += y_stride as usize; + } +} + +/// Convert YUV NV12 format to BGRA format. +/// +/// This function takes YUV NV12 data with 8-bit precision, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `uv_plane` - A slice to load the UV (chrominance) plane data. +/// * `uv_stride` - The stride (bytes per row) for the UV plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_to_bgra( + y_plane: &[u8], + y_stride: u32, + uv_plane: &[u8], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::UV as u8 }, + { YuvSourceChannels::Bgra as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV NV21 format to BGRA format. +/// +/// This function takes YUV NV12 data with 8-bit precision, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `vu_plane` - A slice to load the VU (chrominance) plane data. +/// * `vu_stride` - The stride (bytes per row) for the VU plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv21_to_bgra( + y_plane: &[u8], + y_stride: u32, + vu_plane: &[u8], + vu_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::VU as u8 }, + { YuvSourceChannels::Bgra as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + vu_plane, + vu_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV NV12 format to RGBA format. +/// +/// This function takes YUV NV12 data with 8-bit precision, +/// and converts it to RGBA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `uv_plane` - A slice to load the UV (chrominance) plane data. +/// * `uv_stride` - The stride (bytes per row) for the UV plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgba_data` - A mutable slice to store the converted RGBA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_to_rgba( + y_plane: &[u8], + y_stride: u32, + uv_plane: &[u8], + uv_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::UV as u8 }, + { YuvSourceChannels::Rgba as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV NV21 format to RGBA format. +/// +/// This function takes YUV NV21 data with 8-bit precision, +/// and converts it to RGBA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `vu_plane` - A slice to load the VU (chrominance) plane data. +/// * `vu_stride` - The stride (bytes per row) for the VU plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgba_data` - A mutable slice to store the converted RGBA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv21_to_rgba( + y_plane: &[u8], + y_stride: u32, + vu_plane: &[u8], + vu_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::VU as u8 }, + { YuvSourceChannels::Rgba as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + vu_plane, + vu_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV NV12 format to RGB format. +/// +/// This function takes YUV NV12 data with 8-bit precision, +/// and converts it to RGB format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `uv_plane` - A slice to load the UV (chrominance) plane data. +/// * `uv_stride` - The stride (bytes per row) for the UV plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgb_data` - A mutable slice to store the converted RGB data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_to_rgb( + y_plane: &[u8], + y_stride: u32, + uv_plane: &[u8], + uv_stride: u32, + rgb: &mut [u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::UV as u8 }, + { YuvSourceChannels::Rgb as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV NV21 format to RGB format. +/// +/// This function takes YUV NV21 data with 8-bit precision, +/// and converts it to RGB format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `vu_plane` - A slice to load the VU (chrominance) plane data. +/// * `vu_stride` - The stride (bytes per row) for the VU plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgb_data` - A mutable slice to store the converted RGB data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv21_to_rgb( + y_plane: &[u8], + y_stride: u32, + vu_plane: &[u8], + vu_stride: u32, + rgb: &mut [u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_to_rgbx::< + { YuvNVOrder::VU as u8 }, + { YuvSourceChannels::Rgb as u8 }, + { YuvChromaSample::YUV420 as u8 }, + >( + y_plane, + y_stride, + vu_plane, + vu_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ) +} + diff --git a/src/yuv_nv12_p10.rs b/src/yuv_nv12_p10.rs new file mode 100644 index 0000000..34ed931 --- /dev/null +++ b/src/yuv_nv12_p10.rs @@ -0,0 +1,628 @@ +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +#[cfg(target_feature = "neon")] +use std::arch::aarch64::{ + int16x4_t, int16x8_t, uint16x4x2_t, uint8x8x4_t, vcombine_s16, vdup_n_s16, vdup_n_u8, + vdupq_n_s16, vget_low_s16, vld1q_u16, vld2_u16, vmaxq_s16, vmlal_s16, vmull_high_s16, + vmull_s16, vreinterpret_s16_u16, vreinterpret_u16_u8, vreinterpret_u8_u16, + vreinterpretq_s16_u16, vqshrun_n_s16, vreinterpretq_u16_u8, vreinterpretq_u8_u16, + vrev16_u8, vrev16q_u8, vshr_n_u16, vshrn_n_s32, vshrq_n_u16, vst4_u8, vsub_s16, vsubq_s16, + vzip1_s16, vzip2_s16, uint8x8x3_t, vst3_u8, +}; +use std::slice; + +use crate::yuv_support::{ + get_inverse_transform, get_kr_kb, get_yuv_range, YuvBytesPosition, YuvChromaSample, YuvEndian, + YuvNVOrder, YuvRange, YuvSourceChannels, YuvStandardMatrix, +}; + +fn yuv_nv12_p10_to_bgra_impl< + const DESTINATION_CHANNELS: u8, + const NV_ORDER: u8, + const SAMPLING: u8, + const ENDIANNESS: u8, + const BYTES_POSITION: u8, +>( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); + let channels = destination_channels.get_channels_count(); + let uv_order: YuvNVOrder = NV_ORDER.into(); + let chroma_subsampling: YuvChromaSample = SAMPLING.into(); + let endianness: YuvEndian = ENDIANNESS.into(); + let bytes_position: YuvBytesPosition = BYTES_POSITION.into(); + let range = get_yuv_range(10, range); + let kr_kb = get_kr_kb(matrix); + let max_range_p10 = (2f32.powi(10) - 1f32) as u32; + let transform = get_inverse_transform( + max_range_p10, + range.range_y, + range.range_uv, + kr_kb.kr, + kr_kb.kb, + ); + let i_transform = transform.to_integers(6u32); + let cr_coef = i_transform.cr_coef; + let cb_coef = i_transform.cb_coef; + let y_coef = i_transform.y_coef; + let g_coef_1 = i_transform.g_coeff_1; + let g_coef_2 = i_transform.g_coeff_2; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + let mut y_offset = 0usize; + let mut uv_offset = 0usize; + let mut dst_offest = 0usize; + + let y_src_ptr = y_plane.as_ptr() as *const u8; + let uv_src_ptr = uv_plane.as_ptr() as *const u8; + + for y in 0..height as usize { + let mut x = 0usize; + + let y_ld_ptr = unsafe { y_src_ptr.offset(y_offset as isize) as *const u16 }; + let y_ld = unsafe { slice::from_raw_parts(y_ld_ptr, width as usize) }; + let uv_ld_ptr = unsafe { uv_src_ptr.offset(uv_offset as isize) as *const u16 }; + let uv_ld = unsafe { slice::from_raw_parts(uv_ld_ptr, width as usize * 2usize) }; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + #[cfg(target_feature = "neon")] + unsafe { + let dst_ptr = bgra.as_mut_ptr(); + + let y_corr = vdupq_n_s16(bias_y as i16); + let uv_corr = vdup_n_s16(bias_uv as i16); + let v_luma_coeff = vdupq_n_s16(y_coef as i16); + let v_luma_coeff_4 = vdup_n_s16(y_coef as i16); + let v_cr_coeff = vdup_n_s16(cr_coef as i16); + let v_cb_coeff = vdup_n_s16(cb_coef as i16); + let v_min_values = vdupq_n_s16(0i16); + let v_g_coeff_1 = vdup_n_s16(-1i16 * (g_coef_1 as i16)); + let v_g_coeff_2 = vdup_n_s16(-1i16 * (g_coef_2 as i16)); + let v_alpha = vdup_n_u8(255u8); + + while x + 8 < width as usize { + let y_values: int16x8_t; + + let u_values_c: int16x4_t; + let v_values_c: int16x4_t; + + match endianness { + YuvEndian::BigEndian => { + let mut y_u_values = vreinterpretq_u16_u8(vrev16q_u8( + vreinterpretq_u8_u16(vld1q_u16(y_ld_ptr.add(x))), + )); + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_u_values = vshrq_n_u16::<6>(y_u_values); + } + y_values = vsubq_s16(vreinterpretq_s16_u16(y_u_values), y_corr); + + let mut uv_values_u = vld2_u16(uv_ld_ptr.add(x)); + + if uv_order == YuvNVOrder::VU { + uv_values_u = uint16x4x2_t(uv_values_u.1, uv_values_u.0); + } + + let mut u_v = + vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(uv_values_u.0))); + let mut v_v = + vreinterpret_u16_u8(vrev16_u8(vreinterpret_u8_u16(uv_values_u.1))); + if bytes_position == YuvBytesPosition::MostSignificantBytes { + u_v = vshr_n_u16::<6>(u_v); + v_v = vshr_n_u16::<6>(v_v); + } + u_values_c = vsub_s16(vreinterpret_s16_u16(u_v), uv_corr); + v_values_c = vsub_s16(vreinterpret_s16_u16(v_v), uv_corr); + } + YuvEndian::LittleEndian => { + let mut y_vl = vld1q_u16(y_ld_ptr.add(x)); + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_vl = vshrq_n_u16::<6>(y_vl); + } + y_values = vsubq_s16(vreinterpretq_s16_u16(y_vl), y_corr); + + let mut uv_values_u = vld2_u16(uv_ld_ptr.add(x)); + + if uv_order == YuvNVOrder::VU { + uv_values_u = uint16x4x2_t(uv_values_u.1, uv_values_u.0); + } + + let mut u_vl = uv_values_u.0; + let mut v_vl = uv_values_u.1; + if bytes_position == YuvBytesPosition::MostSignificantBytes { + u_vl = vshr_n_u16::<6>(u_vl); + v_vl = vshr_n_u16::<6>(v_vl); + } + u_values_c = vsub_s16(vreinterpret_s16_u16(u_vl), uv_corr); + v_values_c = vsub_s16(vreinterpret_s16_u16(v_vl), uv_corr); + } + } + + let u_high = vzip2_s16(u_values_c, u_values_c); + let v_high = vzip2_s16(v_values_c, v_values_c); + + let y_high = vmull_high_s16(y_values, v_luma_coeff); + + let r_high = vshrn_n_s32::<6>(vmlal_s16(y_high, v_high, v_cr_coeff)); + let b_high = vshrn_n_s32::<6>(vmlal_s16(y_high, u_high, v_cb_coeff)); + let g_high = vshrn_n_s32::<6>(vmlal_s16( + vmlal_s16(y_high, v_high, v_g_coeff_1), + u_high, + v_g_coeff_2, + )); + + let y_low = vmull_s16(vget_low_s16(y_values), v_luma_coeff_4); + let u_low = vzip1_s16(u_values_c, u_values_c); + let v_low = vzip1_s16(v_values_c, v_values_c); + + let r_low = vshrn_n_s32::<6>(vmlal_s16(y_low, v_low, v_cr_coeff)); + let b_low = vshrn_n_s32::<6>(vmlal_s16(y_low, u_low, v_cb_coeff)); + let g_low = vshrn_n_s32::<6>(vmlal_s16( + vmlal_s16(y_low, v_low, v_g_coeff_1), + u_low, + v_g_coeff_2, + )); + + let r_values = vqshrun_n_s16::<2>(vmaxq_s16( + vcombine_s16(r_low, r_high), + v_min_values, + )); + let g_values = vqshrun_n_s16::<2>(vmaxq_s16( + vcombine_s16(g_low, g_high), + v_min_values, + )); + let b_values = vqshrun_n_s16::<2>(vmaxq_s16( + vcombine_s16(b_low, b_high), + v_min_values, + )); + + match destination_channels { + YuvSourceChannels::Rgb => { + let dst_pack: uint8x8x3_t = uint8x8x3_t(r_values, g_values, b_values); + vst3_u8(dst_ptr.add(dst_offest + x * channels), dst_pack); + } + YuvSourceChannels::Rgba => { + let dst_pack: uint8x8x4_t = uint8x8x4_t(r_values, g_values, b_values, v_alpha); + vst4_u8(dst_ptr.add(dst_offest + x * channels), dst_pack); + } + YuvSourceChannels::Bgra => { + let dst_pack: uint8x8x4_t = uint8x8x4_t(b_values, g_values, r_values, v_alpha); + vst4_u8(dst_ptr.add(dst_offest + x * channels), dst_pack); + } + } + + x += 8; + } + } + + while x < width as usize { + let y_value: i32; + let cb_value: i32; + let cr_value: i32; + match endianness { + YuvEndian::BigEndian => { + let mut y_vl = u16::from_be(y_ld[x]) as i32; + let mut cb_vl = u16::from_be(uv_ld[x]) as i32; + let mut cr_vl = u16::from_be(uv_ld[x + 1]) as i32; + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_vl = y_vl >> 6; + cb_vl = cb_vl >> 6; + cr_vl = cr_vl >> 6; + } + y_value = (y_vl - bias_y) * y_coef; + + match uv_order { + YuvNVOrder::UV => { + cb_value = cb_vl - bias_uv; + cr_value = cr_vl - bias_uv; + } + YuvNVOrder::VU => { + cr_value = cb_vl - bias_uv; + cb_value = cr_vl - bias_uv; + } + } + } + YuvEndian::LittleEndian => { + let mut y_vl = u16::from_le(y_ld[x]) as i32; + let mut cb_vl = u16::from_le(uv_ld[x]) as i32; + let mut cr_vl = u16::from_le(uv_ld[x + 1]) as i32; + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_vl = y_vl >> 6; + cb_vl = cb_vl >> 6; + cr_vl = cr_vl >> 6; + } + y_value = (y_vl - bias_y) * y_coef; + match uv_order { + YuvNVOrder::UV => { + cb_value = cb_vl - bias_uv; + cr_value = cr_vl - bias_uv; + } + YuvNVOrder::VU => { + cr_value = cb_vl - bias_uv; + cb_value = cr_vl - bias_uv; + } + } + } + } + + // shift right 8 due to we want to make it 8 bit instead of 10 + + let r_u16 = (y_value + cr_coef * cr_value) >> 8; + let b_u16 = (y_value + cb_coef * cb_value) >> 8; + let g_u16 = (y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 8; + + let r = r_u16.min(255).max(0); + let b = b_u16.min(255).max(0); + let g = g_u16.min(255).max(0); + + let px = x * channels; + + let rgb_offset = dst_offest + px; + + bgra[rgb_offset + destination_channels.get_b_channel_offset()] = b as u8; + bgra[rgb_offset + destination_channels.get_g_channel_offset()] = g as u8; + bgra[rgb_offset + destination_channels.get_r_channel_offset()] = r as u8; + if destination_channels.has_alpha() { + bgra[rgb_offset + destination_channels.get_a_channel_offset()] = 255; + } + + x += 1; + + if x + 1 < width as usize { + let y_value: i32; + match endianness { + YuvEndian::BigEndian => { + let mut y_vl = u16::from_be(y_ld[x]) as i32; + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_vl = y_vl >> 6; + } + y_value = (y_vl - bias_y) * y_coef; + } + YuvEndian::LittleEndian => { + let mut y_vl = u16::from_le(y_ld[x]) as i32; + if bytes_position == YuvBytesPosition::MostSignificantBytes { + y_vl = y_vl >> 6; + } + y_value = (y_vl - bias_y) * y_coef; + } + } + + let r_u16 = (y_value + cr_coef * cr_value) >> 8; + let b_u16 = (y_value + cb_coef * cb_value) >> 8; + let g_u16 = (y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 8; + + let r = r_u16.min(255).max(0); + let b = b_u16.min(255).max(0); + let g = g_u16.min(255).max(0); + + let px = x * channels; + let rgb_offset = dst_offest + px; + bgra[rgb_offset + destination_channels.get_b_channel_offset()] = b as u8; + bgra[rgb_offset + destination_channels.get_g_channel_offset()] = g as u8; + bgra[rgb_offset + destination_channels.get_r_channel_offset()] = r as u8; + if destination_channels.has_alpha() { + bgra[rgb_offset + destination_channels.get_a_channel_offset()] = 255; + } + } + + x += 1; + } + + match chroma_subsampling { + YuvChromaSample::YUV420 => { + if y & 1 == 1 { + uv_offset += uv_stride as usize; + } + } + YuvChromaSample::YUV422 | YuvChromaSample::YUV444 => { + uv_offset += uv_stride as usize; + } + } + + dst_offest += bgra_stride as usize; + y_offset += y_stride as usize; + } +} + +/// Convert YUV NV12 format with P010 pixel format to BGRA format (Little-Endian). +/// +/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian. +/// and converts it to BGRA format with big-endian byte order. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format (Little-Endian). +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_p10_to_bgra( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV420 as u8 }, + { YuvEndian::LittleEndian as u8 }, + { YuvBytesPosition::LeastSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert YUV NV16 format with P010 pixel format to BGRA format (Little-Endian). +/// +/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian. +/// and converts it to BGRA format with big-endian byte order. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format (Little-Endian). +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv16_p10_to_bgra( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV422 as u8 }, + { YuvEndian::LittleEndian as u8 }, + { YuvBytesPosition::LeastSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert YUV NV12 format with P010 pixel format to BGRA format (Big-Endian). +/// +/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian. +/// and converts it to BGRA format with big-endian byte order. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format (Big-Endian). +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_p10_to_bgra_be( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV420 as u8 }, + { YuvEndian::BigEndian as u8 }, + { YuvBytesPosition::LeastSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert YUV NV16 format with P010 pixel format to BGRA format (Big-Endian). +/// +/// This function takes YUV NV16 data with 10-bit precision stored in Big-Endian. +/// and converts it to BGRA format with big-endian byte order. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format (Big-Endian). +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv16_p10_to_bgra_be( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV422 as u8 }, + { YuvEndian::BigEndian as u8 }, + { YuvBytesPosition::LeastSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert YUV NV12 format with P010 pixel format (MSB) to BGRA format. +/// +/// This function takes YUV NV16 data with 10-bit precision and MSB ordering, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV12 data with P010 pixel format. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv12_p10_msb_to_bgra( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV420 as u8 }, + { YuvEndian::LittleEndian as u8 }, + { YuvBytesPosition::MostSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} + +/// Convert YUV NV16 format with P010 pixel format (MSB) to BGRA format. +/// +/// This function takes YUV NV16 data with 10-bit precision and MSB ordering, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `yuv_data` - A slice containing YUV NV16 data with P010 pixel format. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv_nv16_p10_msb_to_bgra( + y_plane: &[u16], + y_stride: u32, + uv_plane: &[u16], + uv_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_nv12_p10_to_bgra_impl::< + { YuvSourceChannels::Bgra as u8 }, + { YuvNVOrder::UV as u8 }, + { YuvChromaSample::YUV422 as u8 }, + { YuvEndian::LittleEndian as u8 }, + { YuvBytesPosition::MostSignificantBytes as u8 }, + >( + y_plane, + y_stride, + uv_plane, + uv_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ); +} \ No newline at end of file diff --git a/src/yuv_support.rs b/src/yuv_support.rs new file mode 100644 index 0000000..7bd59e9 --- /dev/null +++ b/src/yuv_support.rs @@ -0,0 +1,363 @@ +#[derive(Copy, Clone)] +pub struct CbCrInverseTransform { + pub y_coef: T, + pub cr_coef: T, + pub cb_coef: T, + pub g_coeff_1: T, + pub g_coeff_2: T, +} + +impl CbCrInverseTransform { + pub fn new( + y_coef: T, + cr_coef: T, + cb_coef: T, + g_coeff_1: T, + g_coeff_2: T, + ) -> CbCrInverseTransform { + return CbCrInverseTransform { + y_coef, + cr_coef, + cb_coef, + g_coeff_1, + g_coeff_2, + }; + } +} + +impl CbCrInverseTransform { + pub fn to_integers(&self, precision: u32) -> CbCrInverseTransform { + let precision_scale: i32 = 1i32 << (precision as i32); + let cr_coef = (self.cr_coef * precision_scale as f32).round() as i32; + let cb_coef = (self.cb_coef * precision_scale as f32).round() as i32; + let y_coef = (self.y_coef * precision_scale as f32).round() as i32; + let g_coef_1 = (self.g_coeff_1 * precision_scale as f32).round() as i32; + let g_coef_2 = (self.g_coeff_2 * precision_scale as f32).round() as i32; + CbCrInverseTransform:: { + y_coef, + cr_coef, + cb_coef, + g_coeff_1: g_coef_1, + g_coeff_2: g_coef_2, + } + } +} + +pub fn get_inverse_transform( + range_bgra: u32, + range_y: u32, + range_uv: u32, + kr: f32, + kb: f32, +) -> CbCrInverseTransform { + let range_uv = range_bgra as f32 / range_uv as f32; + let y_coef = range_bgra as f32 / range_y as f32; + let cr_coeff = (2f32 * (1f32 - kr)) * range_uv; + let cb_coeff = (2f32 * (1f32 - kb)) * range_uv; + let kg = 1.0f32 - kr - kb; + if kg == 0f32 { + panic!("1.0f - kr - kg must not be 0"); + } + let g_coeff_1 = (2f32 * ((1f32 - kr) * kr / kg)) * range_uv; + let g_coeff_2 = (2f32 * ((1f32 - kb) * kb / kg)) * range_uv; + return CbCrInverseTransform::new(y_coef, cr_coeff, cb_coeff, g_coeff_1, g_coeff_2); +} + +#[repr(C)] +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub struct CbCrForwardTransform { + pub yr: T, + pub yg: T, + pub yb: T, + pub cb_r: T, + pub cb_g: T, + pub cb_b: T, + pub cr_r: T, + pub cr_g: T, + pub cr_b: T, +} + +pub trait ToIntegerTransform { + fn to_integers(&self, precision: u32) -> CbCrForwardTransform; +} + +impl ToIntegerTransform for CbCrForwardTransform { + fn to_integers(&self, precision: u32) -> CbCrForwardTransform { + let scale = (1 << precision) as f32; + return CbCrForwardTransform:: { + yr: (self.yr * scale).round() as i32, + yg: (self.yg * scale).round() as i32, + yb: (self.yb * scale).round() as i32, + cb_r: (self.cb_r * scale).round() as i32, + cb_g: (self.cb_g * scale).round() as i32, + cb_b: (self.cb_b * scale).round() as i32, + cr_r: (self.cr_r * scale).round() as i32, + cr_g: (self.cr_g * scale).round() as i32, + cr_b: (self.cr_b * scale).round() as i32, + }; + } +} + +pub fn get_forward_transform( + range_bgra: u32, + range_y: u32, + range_uv: u32, + kr: f32, + kb: f32, +) -> CbCrForwardTransform { + let kg = 1.0f32 - kr - kb; + if kg == 0f32 { + panic!("1.0f - kr - kg must not be 0"); + } + + let yr = kr * range_y as f32 / range_bgra as f32; + let yg = kg * range_y as f32 / range_bgra as f32; + let yb = kb * range_y as f32 / range_bgra as f32; + + let cb_r = -0.5f32 * kr / (1f32 - kb) * range_uv as f32 / range_bgra as f32; + let cb_g = -0.5f32 * kg / (1f32 - kb) * range_uv as f32 / range_bgra as f32; + let cb_b = 0.5f32 * range_uv as f32 / range_bgra as f32; + + let cr_r = 0.5f32 * range_uv as f32 / range_bgra as f32; + let cr_g = -0.5f32 * kg / (1f32 - kr) * range_uv as f32 / range_bgra as f32; + let cr_b = -0.5f32 * kb / (1f32 - kr) * range_uv as f32 / range_bgra as f32; + return CbCrForwardTransform { + yr, + yg, + yb, + cb_r, + cb_g, + cb_b, + cr_r, + cr_g, + cr_b, + }; +} + +#[repr(C)] +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub enum YuvRange { + TV, + Full, +} + +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub struct YuvChromaRange { + pub bias_y: u32, + pub bias_uv: u32, + pub range_y: u32, + pub range_uv: u32, + pub range: YuvRange, +} + +pub fn get_yuv_range(depth: u32, range: YuvRange) -> YuvChromaRange { + return match range { + YuvRange::TV => YuvChromaRange { + bias_y: 16 << (depth - 8), + bias_uv: 1 << (depth - 1), + range_y: 219 << (depth - 8), + range_uv: 224 << (depth - 8), + range, + }, + YuvRange::Full => YuvChromaRange { + bias_y: 0, + bias_uv: 1 << (depth - 1), + range_uv: 2f32.powi(depth as i32) as u32 - 1, + range_y: 2f32.powi(depth as i32) as u32 - 1, + range, + }, + }; +} + +#[repr(C)] +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub enum YuvStandardMatrix { + Bt601, + Bt709, + Bt2020, + Smpte240, +} + +#[derive(Copy, Clone)] +pub struct YuvBias { + pub kr: f32, + pub kb: f32, +} + +pub fn get_kr_kb(matrix: YuvStandardMatrix) -> YuvBias { + return match matrix { + YuvStandardMatrix::Bt601 => YuvBias { + kr: 0.299f32, + kb: 0.114f32, + }, + YuvStandardMatrix::Bt709 => YuvBias { + kr: 0.2126f32, + kb: 0.0722f32, + }, + YuvStandardMatrix::Bt2020 => YuvBias { + kr: 0.2627f32, + kb: 0.0593f32, + }, + YuvStandardMatrix::Smpte240 => YuvBias { + kr: 0.087f32, + kb: 0.212f32, + }, + }; +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YuvNVOrder { + UV = 0, + VU = 1, +} + +impl From for YuvNVOrder { + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => YuvNVOrder::UV, + 1 => YuvNVOrder::VU, + _ => { + panic!("Unknown value") + } + } + } +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YuvChromaSample { + YUV420 = 0, + YUV422 = 1, + YUV444 = 2, +} + +impl From for YuvChromaSample { + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => YuvChromaSample::YUV420, + 1 => YuvChromaSample::YUV422, + 2 => YuvChromaSample::YUV444, + _ => { + panic!("Unknown value") + } + } + } +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YuvEndian { + BigEndian = 0, + LittleEndian = 1, +} + +impl From for YuvEndian { + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => YuvEndian::BigEndian, + 1 => YuvEndian::LittleEndian, + _ => { + panic!("Unknown value") + } + } + } +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YuvBytesPosition { + MostSignificantBytes = 0, + LeastSignificantBytes = 1, +} + +impl From for YuvBytesPosition { + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => YuvBytesPosition::MostSignificantBytes, + 1 => YuvBytesPosition::LeastSignificantBytes, + _ => { + panic!("Unknown value") + } + } + } +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum YuvSourceChannels { + Rgb = 0, + Rgba = 1, + Bgra = 2, +} + +impl From for YuvSourceChannels { + #[inline(always)] + fn from(value: u8) -> Self { + match value { + 0 => YuvSourceChannels::Rgb, + 1 => YuvSourceChannels::Rgba, + 2 => YuvSourceChannels::Bgra, + _ => { + panic!("Unknown value") + } + } + } +} + +impl YuvSourceChannels { + #[inline(always)] + pub fn get_channels_count(&self) -> usize { + match self { + YuvSourceChannels::Rgb => 3, + YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 4, + } + } + + #[inline(always)] + pub fn has_alpha(&self) -> bool { + match self { + YuvSourceChannels::Rgb => false, + YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => true, + } + } +} + +impl YuvSourceChannels { + #[inline(always)] + pub fn get_r_channel_offset(&self) -> usize { + match self { + YuvSourceChannels::Rgb => 0, + YuvSourceChannels::Rgba => 0, + YuvSourceChannels::Bgra => 2, + } + } + + #[inline(always)] + pub fn get_g_channel_offset(&self) -> usize { + match self { + YuvSourceChannels::Rgb => 1, + YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 1, + } + } + + #[inline(always)] + pub fn get_b_channel_offset(&self) -> usize { + match self { + YuvSourceChannels::Rgb => 2, + YuvSourceChannels::Rgba => 2, + YuvSourceChannels::Bgra => 0, + } + } + #[inline(always)] + pub fn get_a_channel_offset(&self) -> usize { + match self { + YuvSourceChannels::Rgb => 0, + YuvSourceChannels::Rgba | YuvSourceChannels::Bgra => 3, + } + } +} diff --git a/src/yuv_to_rgba.rs b/src/yuv_to_rgba.rs new file mode 100644 index 0000000..c8950ae --- /dev/null +++ b/src/yuv_to_rgba.rs @@ -0,0 +1,751 @@ +#[cfg(any(target_arch = "arm", target_arch = "aarch64"))] +#[cfg(target_feature = "neon")] +use std::arch::aarch64::{ + uint8x16x3_t, uint8x16x4_t, uint8x8_t, vcombine_u8, vdup_n_u8, vdupq_n_s16, vdupq_n_u8, + vget_high_u8, vget_low_u8, vld1_u8, vld1q_u8, vmaxq_s16, vmovl_u8, vmull_high_u8, vmull_u8, + vmulq_s16, vqaddq_s16, vqshrun_n_s16, vreinterpretq_s16_u16, vst3q_u8, vst4q_u8, vsubq_s16, + vsubq_u8, vzip1_u8, vzip2_u8, +}; + +use crate::yuv_support::{ + get_inverse_transform, get_kr_kb, get_yuv_range, YuvChromaSample, YuvRange, YuvSourceChannels, + YuvStandardMatrix, +}; + +fn yuv_to_rgbx( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + let chroma_subsampling: YuvChromaSample = SAMPLING.into(); + let destination_channels: YuvSourceChannels = DESTINATION_CHANNELS.into(); + let channels = destination_channels.get_channels_count(); + let range = get_yuv_range(8, range); + let kr_kb = get_kr_kb(matrix); + let transform = get_inverse_transform(255, range.range_y, range.range_uv, kr_kb.kr, kr_kb.kb); + let precision_scale: i32 = 1i32 << 6i32; + let cr_coef = (transform.cr_coef * precision_scale as f32).round() as i32; + let cb_coef = (transform.cb_coef * precision_scale as f32).round() as i32; + let y_coef = (transform.y_coef * precision_scale as f32).round() as i32; + let g_coef_1 = (transform.g_coeff_1 * precision_scale as f32).round() as i32; + let g_coef_2 = (transform.g_coeff_2 * precision_scale as f32).round() as i32; + + let bias_y = range.bias_y as i32; + let bias_uv = range.bias_uv as i32; + + let mut y_offset = 0usize; + let mut u_offset = 0usize; + let mut v_offset = 0usize; + let mut rgba_offset = 0usize; + + let iterator_step = match chroma_subsampling { + YuvChromaSample::YUV420 => 2usize, + YuvChromaSample::YUV422 => 2usize, + YuvChromaSample::YUV444 => 1usize, + }; + + for y in 0..height as usize { + let mut cx = 0usize; + + let mut uv_x = 0usize; + + #[cfg(any(target_arch = "arm", target_arch = "aarch64"))] + #[cfg(target_feature = "neon")] + unsafe { + let y_ptr = y_plane.as_ptr(); + let u_ptr = u_plane.as_ptr(); + let v_ptr = v_plane.as_ptr(); + let rgba_ptr = rgba.as_mut_ptr(); + + let y_corr = vdupq_n_u8(bias_y as u8); + let uv_corr = vdupq_n_s16(bias_uv as i16); + let v_luma_coeff = vdupq_n_u8(y_coef as u8); + let v_luma_coeff_8 = vdup_n_u8(y_coef as u8); + let v_cr_coeff = vdupq_n_s16(cr_coef as i16); + let v_cb_coeff = vdupq_n_s16(cb_coef as i16); + let v_min_values = vdupq_n_s16(0i16); + let v_g_coeff_1 = vdupq_n_s16(-1i16 * g_coef_1 as i16); + let v_g_coeff_2 = vdupq_n_s16(-1i16 * g_coef_2 as i16); + let v_alpha = vdupq_n_u8(255u8); + + while cx + 16 < width as usize { + let y_values = vsubq_u8(vld1q_u8(y_ptr.add(y_offset + cx)), y_corr); + + let u_high_u8: uint8x8_t; + let v_high_u8: uint8x8_t; + let u_low_u8: uint8x8_t; + let v_low_u8: uint8x8_t; + + match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => { + let u_values = vld1_u8(u_ptr.add(u_offset + uv_x)); + let v_values = vld1_u8(v_ptr.add(v_offset + uv_x)); + + u_high_u8 = vzip2_u8(u_values, u_values); + v_high_u8 = vzip2_u8(v_values, v_values); + u_low_u8 = vzip1_u8(u_values, u_values); + v_low_u8 = vzip1_u8(v_values, v_values); + } + YuvChromaSample::YUV444 => { + let u_values = vld1q_u8(u_ptr.add(u_offset + uv_x)); + let v_values = vld1q_u8(v_ptr.add(v_offset + uv_x)); + + u_high_u8 = vget_high_u8(u_values); + v_high_u8 = vget_high_u8(v_values); + u_low_u8 = vget_low_u8(u_values); + v_low_u8 = vget_low_u8(v_values); + } + } + + let u_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_high_u8)), uv_corr); + let v_high = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_high_u8)), uv_corr); + let y_high = vreinterpretq_s16_u16(vmull_high_u8(y_values, v_luma_coeff)); + + let r_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_high, vmulq_s16(v_high, v_cr_coeff)), + v_min_values, + )); + let b_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_high, vmulq_s16(u_high, v_cb_coeff)), + v_min_values, + )); + let g_high = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16( + y_high, + vqaddq_s16( + vmulq_s16(v_high, v_g_coeff_1), + vmulq_s16(u_high, v_g_coeff_2), + ), + ), + v_min_values, + )); + + let u_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(u_low_u8)), uv_corr); + let v_low = vsubq_s16(vreinterpretq_s16_u16(vmovl_u8(v_low_u8)), uv_corr); + let y_low = vreinterpretq_s16_u16(vmull_u8(vget_low_u8(y_values), v_luma_coeff_8)); + + let r_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_low, vmulq_s16(v_low, v_cr_coeff)), + v_min_values, + )); + let b_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16(y_low, vmulq_s16(u_low, v_cb_coeff)), + v_min_values, + )); + let g_low = vqshrun_n_s16::<6>(vmaxq_s16( + vqaddq_s16( + y_low, + vqaddq_s16(vmulq_s16(v_low, v_g_coeff_1), vmulq_s16(u_low, v_g_coeff_2)), + ), + v_min_values, + )); + + let r_values = vcombine_u8(r_low, r_high); + let g_values = vcombine_u8(g_low, g_high); + let b_values = vcombine_u8(b_low, b_high); + + let dst_shift = rgba_offset + cx * channels; + + match destination_channels { + YuvSourceChannels::Rgb => { + let dst_pack: uint8x16x3_t = uint8x16x3_t(r_values, g_values, b_values); + vst3q_u8(rgba_ptr.add(dst_shift), dst_pack); + } + YuvSourceChannels::Rgba => { + let dst_pack: uint8x16x4_t = + uint8x16x4_t(b_values, g_values, r_values, v_alpha); + vst4q_u8(rgba_ptr.add(dst_shift), dst_pack); + } + YuvSourceChannels::Bgra => { + let dst_pack: uint8x16x4_t = + uint8x16x4_t(r_values, g_values, b_values, v_alpha); + vst4q_u8(rgba_ptr.add(dst_shift), dst_pack); + } + } + + cx += 16; + + match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => { + uv_x += 8; + } + YuvChromaSample::YUV444 => { + uv_x += 16; + } + } + } + } + + for x in (cx..width as usize).step_by(iterator_step) { + let y_value = (y_plane[y_offset + x] as i32 - bias_y) * y_coef; + + let u_pos = match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => u_offset + x / 2, + YuvChromaSample::YUV444 => u_offset + x, + }; + + let cb_value = u_plane[u_pos] as i32 - bias_uv; + + let v_pos = match chroma_subsampling { + YuvChromaSample::YUV420 | YuvChromaSample::YUV422 => v_offset + x / 2, + YuvChromaSample::YUV444 => v_offset + x, + }; + + let cr_value = v_plane[v_pos] as i32 - bias_uv; + + let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0); + let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6) + .min(255) + .max(0); + + let px = x * channels; + + let rgba_shift = rgba_offset + px; + + rgba[rgba_shift + destination_channels.get_r_channel_offset()] = r as u8; + rgba[rgba_shift + destination_channels.get_g_channel_offset()] = g as u8; + rgba[rgba_shift + destination_channels.get_b_channel_offset()] = b as u8; + if destination_channels.has_alpha() { + rgba[rgba_shift + destination_channels.get_a_channel_offset()] = 255; + } + + if chroma_subsampling == YuvChromaSample::YUV420 + || chroma_subsampling == YuvChromaSample::YUV422 + { + if x + 1 < width as usize { + let y_value = (y_plane[y_offset + x + 1] as i32 - bias_y) * y_coef; + + let r = ((y_value + cr_coef * cr_value) >> 6).min(255).max(0); + let b = ((y_value + cb_coef * cb_value) >> 6).min(255).max(0); + let g = ((y_value - g_coef_1 * cr_value - g_coef_2 * cb_value) >> 6) + .min(255) + .max(0); + + let next_px = (x + 1) * channels; + + let rgba_shift = rgba_offset + next_px; + + rgba[rgba_shift + destination_channels.get_r_channel_offset()] = r as u8; + rgba[rgba_shift + destination_channels.get_g_channel_offset()] = g as u8; + rgba[rgba_shift + destination_channels.get_b_channel_offset()] = b as u8; + if destination_channels.has_alpha() { + rgba[rgba_shift + destination_channels.get_a_channel_offset()] = 255; + } + } + } + + uv_x += 1; + } + + y_offset += y_stride as usize; + rgba_offset += rgba_stride as usize; + match chroma_subsampling { + YuvChromaSample::YUV420 => { + if y & 1 == 1 { + u_offset += u_stride as usize; + v_offset += v_stride as usize; + } + } + YuvChromaSample::YUV444 | YuvChromaSample::YUV422 => { + u_offset += u_stride as usize; + v_offset += v_stride as usize; + } + } + } +} + +/// Convert YUV 420 format to RGB format. +/// +/// This function takes YUV 420 planar format data with 8-bit precision, +/// and converts it to RGB format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgb_data` - A mutable slice to store the converted RGB data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv420_to_rgb( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgb: &mut [u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 420 format to RGBA format. +/// +/// This function takes YUV 420 planar format data with 8-bit precision, +/// and converts it to RGBA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgba_data` - A mutable slice to store the converted RGBA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv420_to_rgba( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 420 format to BGRA format. +/// +/// This function takes YUV 420 planar format data with 8-bit precision, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv420_to_bgra( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV420 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 422 format to RGB format. +/// +/// This function takes YUV 422 data with 8-bit precision, +/// and converts it to RGB format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgb_data` - A mutable slice to store the converted RGB data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv422_to_rgb( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgb: &mut [u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 422 format to RGBA format. +/// +/// This function takes YUV 422 data with 8-bit precision, +/// and converts it to RGBA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted RGBA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv422_to_rgba( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 422 format to BGRA format. +/// +/// This function takes YUV 422 data with 8-bit precision, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv422_to_bgra( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV422 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 444 format to RGBA format. +/// +/// This function takes YUV 444 data with 8-bit precision, +/// and converts it to RGBA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgba_data` - A mutable slice to store the converted RGBA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv444_to_rgba( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgba: &mut [u8], + rgba_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgba as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgba, + rgba_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 444 format to BGRA format. +/// +/// This function takes YUV 444 data with 8-bit precision, +/// and converts it to BGRA format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `bgra_data` - A mutable slice to store the converted BGRA data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv444_to_bgra( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + bgra: &mut [u8], + bgra_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Bgra as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + bgra, + bgra_stride, + width, + height, + range, + matrix, + ) +} + +/// Convert YUV 444 format to RGB format. +/// +/// This function takes YUV 444 data with 8-bit precision, +/// and converts it to RGB format with 8-bit per channel precision. +/// +/// # Arguments +/// +/// * `y_plane` - A slice to load the Y (luminance) plane data. +/// * `y_stride` - The stride (bytes per row) for the Y plane. +/// * `u_plane` - A slice to load the U (chrominance) plane data. +/// * `u_stride` - The stride (bytes per row) for the U plane. +/// * `v_plane` - A slice to load the V (chrominance) plane data. +/// * `v_stride` - The stride (bytes per row) for the V plane. +/// * `width` - The width of the YUV image. +/// * `height` - The height of the YUV image. +/// * `rgb_data` - A mutable slice to store the converted RGB data. +/// * `range` - The YUV range (limited or full). +/// * `matrix` - The YUV standard matrix (BT.601 or BT.709 or BT.2020 or other). +/// +/// # Panics +/// +/// This function panics if the lengths of the planes or the input BGRA data are not valid based +/// on the specified width, height, and strides, or if invalid YUV range or matrix is provided. +/// +pub fn yuv444_to_rgb( + y_plane: &[u8], + y_stride: u32, + u_plane: &[u8], + u_stride: u32, + v_plane: &[u8], + v_stride: u32, + rgb: &mut [u8], + rgb_stride: u32, + width: u32, + height: u32, + range: YuvRange, + matrix: YuvStandardMatrix, +) { + yuv_to_rgbx::<{ YuvSourceChannels::Rgb as u8 }, { YuvChromaSample::YUV444 as u8 }>( + y_plane, + y_stride, + u_plane, + u_stride, + v_plane, + v_stride, + rgb, + rgb_stride, + width, + height, + range, + matrix, + ) +}