From 696dc366e66faec94bec2a2835bc421289c6fa17 Mon Sep 17 00:00:00 2001 From: Kalaivani Baskar <156762498+KalaivaniMCW@users.noreply.github.com> Date: Thu, 6 Jun 2024 03:55:35 +0530 Subject: [PATCH] #9109: Add q_id to Eltwise binary EQ (#9177) #9109: Add q_id to binary EQ in ttlib and test int output --- .../test_eltwise_binary_optional_output.py | 10 ++ .../sweep_tests/tt_lib_ops.py | 27 ++++- .../unit_testing/misc/test_binary_eq_int.py | 72 ++++++++++++ .../eltwise_binary/eltwise_binary_op.hpp | 104 ++++++++++++++++-- .../csrc/tt_lib_bindings_tensor_xary_ops.cpp | 21 +++- 5 files changed, 221 insertions(+), 13 deletions(-) create mode 100644 tests/tt_eager/python_api_testing/unit_testing/misc/test_binary_eq_int.py diff --git a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_binary_optional_output.py b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_binary_optional_output.py index 9d90b45a9c6..dccf0727b50 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_binary_optional_output.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/pytests/tt_dnn/test_eltwise_binary_optional_output.py @@ -117,12 +117,14 @@ def test_run_eltwise_binary_bias_ops( ) @pytest.mark.parametrize("cmp_kind", ["lt", "gt", "lte", "gte", "ne", "eq"]) + @pytest.mark.parametrize("pass_queue_id", [True, False]) def test_run_eltwise_binary_cmp_ops( self, input_shapes, input_mem_config, cmp_kind, device, + pass_queue_id, function_level_defaults, ): datagen_func = [ @@ -135,8 +137,16 @@ def test_run_eltwise_binary_cmp_ops( test_args.update( { "input_mem_config": [input_mem_config, input_mem_config, input_mem_config], + "queue_id": "skip", } ) + if cmp_kind == "eq": + test_args.update( + { + "queue_id": pass_queue_id, + } + ) + comparison_func = comparison_funcs.comp_equal run_single_pytorch_test( f"eltwise-{cmp_kind}-optional", diff --git a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py index 7bc24aae053..3d64d42e2d6 100644 --- a/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py +++ b/tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py @@ -2454,6 +2454,7 @@ def binary_op( t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1]) t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2]) + ttl_tensor_binop(t0, t1, output_tensor=t2) return tt2torch_tensor(t2) @@ -2467,7 +2468,6 @@ def binary_op( eltwise_bias_gelu_optional = make_binary_op_optional_output(ttl.tensor.bias_gelu) eltwise_squared_difference_optional = make_binary_op_optional_output(ttl.tensor.squared_difference) eltwise_ne_optional = make_binary_op_optional_output(ttl.tensor.ne) -eltwise_eq_optional = make_binary_op_optional_output(ttl.tensor.eq) eltwise_gt_optional = make_binary_op_optional_output(ttl.tensor.gt) eltwise_lt_optional = make_binary_op_optional_output(ttl.tensor.lt) eltwise_gte_optional = make_binary_op_optional_output(ttl.tensor.gte) @@ -2479,6 +2479,31 @@ def binary_op( eltwise_logical_or_optional = make_binary_op_optional_output(ttl.tensor.logical_or) +def eltwise_eq_optional( + x, + y, + z, + *args, + device, + dtype, + layout, + input_mem_config, + queue_id, + **kwargs, +): + cq_id = 0 + t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0]) + t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1]) + t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2]) + + if queue_id == True: + ttl.tensor.eq(cq_id, t0, t1, output_tensor=t2) + else: + ttl.tensor.eq(t0, t1, output_tensor=t2) + + return tt2torch_tensor(t2) + + ################################################ #################### Tensor #################### ################################################ diff --git a/tests/tt_eager/python_api_testing/unit_testing/misc/test_binary_eq_int.py b/tests/tt_eager/python_api_testing/unit_testing/misc/test_binary_eq_int.py new file mode 100644 index 00000000000..918b20556cf --- /dev/null +++ b/tests/tt_eager/python_api_testing/unit_testing/misc/test_binary_eq_int.py @@ -0,0 +1,72 @@ +# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc. + +# SPDX-License-Identifier: Apache-2.0 + +import torch +import pytest +import tt_lib +from tests.tt_eager.python_api_testing.unit_testing.backward_ops.utility_funcs import data_gen_with_range, compare_pcc +from models.utility_functions import is_grayskull + + +@pytest.mark.parametrize( + "input_shapes", + ((torch.Size([1, 1, 32, 32])),), +) +@pytest.mark.parametrize( + "mem_configs", + ( + tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM), + tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1), + ), +) +@pytest.mark.parametrize("out_dtype", (tt_lib.tensor.DataType.UINT32, tt_lib.tensor.DataType.UINT16)) +def test_binary_eq(input_shapes, out_dtype, mem_configs, device): + if is_grayskull(): + pytest.skip("GS does not support fp32/uint32/uint16 data types") + + in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True) + other_data, other_tensor = data_gen_with_range(input_shapes, -90, 100, device, True) + + cq_id = 0 + mem_cfg = mem_configs + + tt_output_tensor_on_device = tt_lib.tensor.eq( + cq_id, input_tensor, other_tensor, output_mem_config=mem_cfg, output_dtype=out_dtype + ) + + golden_tensor = torch.eq(in_data, other_data) + comp_pass = compare_pcc([tt_output_tensor_on_device], [golden_tensor]) + assert comp_pass + + +@pytest.mark.parametrize( + "input_shapes", + ((torch.Size([1, 1, 32, 32])),), +) +@pytest.mark.parametrize( + "mem_configs", + ( + tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM), + tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1), + ), +) +@pytest.mark.parametrize("out_dtype", (tt_lib.tensor.DataType.UINT32, tt_lib.tensor.DataType.UINT16)) +def test_bw_binary_eq_opt_output(input_shapes, device, mem_configs, out_dtype): + if is_grayskull(): + pytest.skip("GS does not support fp32/uint32/uint16 data types") + + in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True) + other_data, other_tensor = data_gen_with_range(input_shapes, -90, 100, device, True) + _, out_tensor = data_gen_with_range(input_shapes, -70, 60, device, True) + + cq_id = 0 + mem_cfg = mem_configs + + tt_lib.tensor.typecast(out_tensor, out_dtype, output_mem_config=mem_cfg) + + tt_lib.tensor.eq(cq_id, input_tensor, other_tensor, output_mem_config=mem_cfg, output_tensor=out_tensor) + + golden_tensor = torch.eq(in_data, other_data) + comp_pass = compare_pcc([out_tensor], [golden_tensor]) + assert comp_pass diff --git a/tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.hpp b/tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.hpp index d69e84c3265..6cf3624eec7 100644 --- a/tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.hpp +++ b/tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.hpp @@ -107,19 +107,65 @@ struct EltwiseBinary { const operation::Hash compute_program_hash(const std::vector &input_tensors) const; }; -template -struct make_eltwise_binary { - Tensor operator()( - const Tensor &input_tensor_a, - const Tensor &input_tensor_b, - std::optional> fused_activations = std::nullopt, - const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, - std::optional output_dtype = std::nullopt, - std::optional output_tensor = std::nullopt) const { +inline Tensor run_eltwise_binary( + uint8_t queue_id, + const Tensor &input_tensor_a, + const Tensor &input_tensor_b, + std::optional> fused_activations, + const MemoryConfig &output_mem_config, + std::optional output_dtype, + std::optional output_tensor, + BinaryOpType binary_op_type) { std::vector output_tensors = {Tensor(operation::get_workers_for_op_output({input_tensor_a, input_tensor_b}))}; + operation::launch_op( + [fused_activations, output_mem_config, output_dtype, output_tensor, queue_id, binary_op_type] (const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector>& optional_output_tensors) mutable -> std::vector { + Tensor in_a = input_tensors.at(0); + Tensor in_b = input_tensors.at(1); + Shape shape_a = in_a.get_legacy_shape(); + Shape shape_b = in_b.get_legacy_shape(); + if (shape_a[0] != shape_b[0]) + { + if (shape_a[0] > shape_b[0]) + { + Shape shape ({shape_a[0],1,1,1}); + in_b = repeat(in_b, shape, output_mem_config); + } + else + { + Shape shape ({shape_b[0],1,1,1}); + in_a = repeat(in_a, shape, output_mem_config); + } + } + TT_FATAL( + (in_a.get_legacy_shape() == in_b.get_legacy_shape()) or + (in_a.get_legacy_shape().without_padding() == in_b.get_legacy_shape().without_padding()), + "Input shapes must be the same!"); + + auto output_tensors = operation::run( + EltwiseBinary{ + binary_op_type, + fused_activations, + output_mem_config, + output_dtype.value_or(in_a.get_dtype()), + false /*in place*/}, + {in_a, in_b}, {}, {output_tensor}, queue_id); + return output_tensors; + }, + {input_tensor_a, input_tensor_b}, output_tensors, {}, {output_tensor}); + return output_tensors.at(0); +} +inline Tensor run_eltwise_binary( + const Tensor &input_tensor_a, + const Tensor &input_tensor_b, + std::optional> fused_activations, + const MemoryConfig &output_mem_config, + std::optional output_dtype, + std::optional output_tensor, + BinaryOpType binary_op_type) { + std::vector output_tensors = {Tensor(operation::get_workers_for_op_output({input_tensor_a, input_tensor_b}))}; operation::launch_op( - [fused_activations, output_mem_config, output_dtype, output_tensor] (const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector>& optional_output_tensors) mutable -> std::vector { + [fused_activations, output_mem_config, output_dtype, output_tensor, binary_op_type] (const std::vector& input_tensors, const std::vector>& optional_input_tensors, const std::vector>& optional_output_tensors) mutable -> std::vector { Tensor in_a = input_tensors.at(0); Tensor in_b = input_tensors.at(1); Shape shape_a = in_a.get_legacy_shape(); @@ -154,9 +200,46 @@ struct make_eltwise_binary { }, {input_tensor_a, input_tensor_b}, output_tensors, {}, {output_tensor}); return output_tensors.at(0); +} + +template +struct make_eltwise_binary { + Tensor operator()( + const Tensor &input_tensor_a, + const Tensor &input_tensor_b, + std::optional> fused_activations = std::nullopt, + const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + std::optional output_dtype = std::nullopt, + std::optional output_tensor = std::nullopt) const { + return run_eltwise_binary( + input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, binary_op_type + ); } }; +inline Tensor eq( + const Tensor &input_tensor_a, + const Tensor &input_tensor_b, + std::optional> fused_activations = std::nullopt, + const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + std::optional output_dtype = std::nullopt, + std::optional output_tensor = std::nullopt) { + return run_eltwise_binary( + input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, BinaryOpType::EQ); +} + +inline Tensor eq( + uint8_t queue_id, + const Tensor &input_tensor_a, + const Tensor &input_tensor_b, + std::optional> fused_activations = std::nullopt, + const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, + std::optional output_dtype = std::nullopt, + std::optional output_tensor = std::nullopt) { + return run_eltwise_binary( + queue_id, input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, BinaryOpType::EQ); +} + // arithmetic binary ops constexpr auto add = make_eltwise_binary{}; constexpr auto sub = make_eltwise_binary{}; @@ -173,7 +256,6 @@ constexpr auto lt = make_eltwise_binary{}; constexpr auto gt = make_eltwise_binary{}; constexpr auto lte = make_eltwise_binary{}; constexpr auto gte = make_eltwise_binary{}; -constexpr auto eq = make_eltwise_binary{}; constexpr auto ne = make_eltwise_binary{}; // logical ops diff --git a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp index cd38dd00aab..be48f309f62 100644 --- a/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp +++ b/tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp @@ -23,13 +23,32 @@ namespace tt::tt_metal::detail { detail::bind_binary_op(m_tensor, "lt", lt, R"doc(Perform an eltwise-binary less-than (``{0} < {1}``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "lte", lte, R"doc(Perform an eltwise-binary less-than-or-equal (``{0} <= {1}``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "gte", gte, R"doc(Perform an eltwise-binary greater-than-or-equal (``{0} >= {1}``) on two tensors.)doc"); - detail::bind_binary_op(m_tensor, "eq", eq, R"doc(Perform an eltwise-binary equal (``{0} == {1}``) on two tensors.)doc"); + detail::bind_binary_op(m_tensor, "eq", py::overload_cast>, const MemoryConfig&, std::optional, std::optional >(&eq), R"doc(Perform an eltwise-binary equal (``{0} == {1}``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "ne", ne, R"doc(Perform an eltwise-binary not-equal (``{0} != {1}``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "ldexp", ldexp, R"doc(Performs eltwise-binary ldexp (``{0} * 2**{1}``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "logaddexp", logaddexp, R"doc(Perform an eltwise-binary logaddexp (``log(exp({0}) + exp({1}))``) on two tensors.)doc"); detail::bind_binary_op(m_tensor, "logaddexp2", logaddexp2, R"doc(Perform an eltwise-binary logaddexp2 (``log2(2^({0}) + 2^({1}))``) on two tensors for input range [-64,64].)doc"); detail::bind_binary_op(m_tensor, "logical_or", logical_or, R"doc(Perform an eltwise-binary logical OR (``{0} || {1}``) on two tensors.)doc"); + m_tensor.def("eq", py::overload_cast>, const MemoryConfig&, std::optional, std::optional >(&eq), + py::arg("queue_id").noconvert() = 0, py::arg("input_a").noconvert(), py::arg("input_b").noconvert(), py::arg("fused_activations").noconvert() = std::nullopt, py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, py::arg("output_dtype").noconvert()= std::nullopt, py::arg("output_tensor").noconvert()= std::nullopt, R"doc( + Perform an eltwise-binary equal (``input_a`` == ``input_b``) on two tensors. + + Input tensor must have BFLOAT16 data type. + + Output tensors will have BFLOAT16 data type. + + .. csv-table:: + :header: "Argument", "Description", "Data type", "Valid range", "Required" + + "queue_id", "queue_id", "uint8_t", "Default is 0", "No" + "input_a", "Tensor add is applied to", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" + "input_b", "Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes" + "fused_activations", "Fused activations after binary computation", "List of FusibleActivation with optional param", "Default is None", "No" + "output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No" + "output_dtype", "Output tensor data type", "DataType", "Default is None (Use input dtype)", "No" + "output_tensor", "Optional output tensor", "Tensor", "Default is None", "No" + )doc"); // *** eltwise unary *** detail::bind_unary_op(m_tensor, "identity", identity, R"doc(Returns a copy of same tensor ``input``; useful for profiling the SFPU. this shouldn't normally be used; users should normally use clone operation instead for same functionality as this would be lower performance.