Skip to content

Commit

Permalink
#9109: Add q_id to Eltwise binary EQ (#9177)
Browse files Browse the repository at this point in the history
#9109: Add q_id to binary EQ in ttlib and test int output
  • Loading branch information
KalaivaniMCW authored Jun 5, 2024
1 parent 792c2e4 commit 696dc36
Show file tree
Hide file tree
Showing 5 changed files with 221 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,14 @@ def test_run_eltwise_binary_bias_ops(
)

@pytest.mark.parametrize("cmp_kind", ["lt", "gt", "lte", "gte", "ne", "eq"])
@pytest.mark.parametrize("pass_queue_id", [True, False])
def test_run_eltwise_binary_cmp_ops(
self,
input_shapes,
input_mem_config,
cmp_kind,
device,
pass_queue_id,
function_level_defaults,
):
datagen_func = [
Expand All @@ -135,8 +137,16 @@ def test_run_eltwise_binary_cmp_ops(
test_args.update(
{
"input_mem_config": [input_mem_config, input_mem_config, input_mem_config],
"queue_id": "skip",
}
)
if cmp_kind == "eq":
test_args.update(
{
"queue_id": pass_queue_id,
}
)

comparison_func = comparison_funcs.comp_equal
run_single_pytorch_test(
f"eltwise-{cmp_kind}-optional",
Expand Down
27 changes: 26 additions & 1 deletion tests/tt_eager/python_api_testing/sweep_tests/tt_lib_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -2454,6 +2454,7 @@ def binary_op(
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2])

ttl_tensor_binop(t0, t1, output_tensor=t2)

return tt2torch_tensor(t2)
Expand All @@ -2467,7 +2468,6 @@ def binary_op(
eltwise_bias_gelu_optional = make_binary_op_optional_output(ttl.tensor.bias_gelu)
eltwise_squared_difference_optional = make_binary_op_optional_output(ttl.tensor.squared_difference)
eltwise_ne_optional = make_binary_op_optional_output(ttl.tensor.ne)
eltwise_eq_optional = make_binary_op_optional_output(ttl.tensor.eq)
eltwise_gt_optional = make_binary_op_optional_output(ttl.tensor.gt)
eltwise_lt_optional = make_binary_op_optional_output(ttl.tensor.lt)
eltwise_gte_optional = make_binary_op_optional_output(ttl.tensor.gte)
Expand All @@ -2479,6 +2479,31 @@ def binary_op(
eltwise_logical_or_optional = make_binary_op_optional_output(ttl.tensor.logical_or)


def eltwise_eq_optional(
x,
y,
z,
*args,
device,
dtype,
layout,
input_mem_config,
queue_id,
**kwargs,
):
cq_id = 0
t0 = setup_tt_tensor(x, device, layout[0], input_mem_config[0], dtype[0])
t1 = setup_tt_tensor(y, device, layout[1], input_mem_config[1], dtype[1])
t2 = setup_tt_tensor(z, device, layout[2], input_mem_config[2], dtype[2])

if queue_id == True:
ttl.tensor.eq(cq_id, t0, t1, output_tensor=t2)
else:
ttl.tensor.eq(t0, t1, output_tensor=t2)

return tt2torch_tensor(t2)


################################################
#################### Tensor ####################
################################################
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import torch
import pytest
import tt_lib
from tests.tt_eager.python_api_testing.unit_testing.backward_ops.utility_funcs import data_gen_with_range, compare_pcc
from models.utility_functions import is_grayskull


@pytest.mark.parametrize(
"input_shapes",
((torch.Size([1, 1, 32, 32])),),
)
@pytest.mark.parametrize(
"mem_configs",
(
tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM),
tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1),
),
)
@pytest.mark.parametrize("out_dtype", (tt_lib.tensor.DataType.UINT32, tt_lib.tensor.DataType.UINT16))
def test_binary_eq(input_shapes, out_dtype, mem_configs, device):
if is_grayskull():
pytest.skip("GS does not support fp32/uint32/uint16 data types")

in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True)
other_data, other_tensor = data_gen_with_range(input_shapes, -90, 100, device, True)

cq_id = 0
mem_cfg = mem_configs

tt_output_tensor_on_device = tt_lib.tensor.eq(
cq_id, input_tensor, other_tensor, output_mem_config=mem_cfg, output_dtype=out_dtype
)

golden_tensor = torch.eq(in_data, other_data)
comp_pass = compare_pcc([tt_output_tensor_on_device], [golden_tensor])
assert comp_pass


@pytest.mark.parametrize(
"input_shapes",
((torch.Size([1, 1, 32, 32])),),
)
@pytest.mark.parametrize(
"mem_configs",
(
tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.DRAM),
tt_lib.tensor.MemoryConfig(tt_lib.tensor.TensorMemoryLayout.INTERLEAVED, tt_lib.tensor.BufferType.L1),
),
)
@pytest.mark.parametrize("out_dtype", (tt_lib.tensor.DataType.UINT32, tt_lib.tensor.DataType.UINT16))
def test_bw_binary_eq_opt_output(input_shapes, device, mem_configs, out_dtype):
if is_grayskull():
pytest.skip("GS does not support fp32/uint32/uint16 data types")

in_data, input_tensor = data_gen_with_range(input_shapes, -100, 100, device, True)
other_data, other_tensor = data_gen_with_range(input_shapes, -90, 100, device, True)
_, out_tensor = data_gen_with_range(input_shapes, -70, 60, device, True)

cq_id = 0
mem_cfg = mem_configs

tt_lib.tensor.typecast(out_tensor, out_dtype, output_mem_config=mem_cfg)

tt_lib.tensor.eq(cq_id, input_tensor, other_tensor, output_mem_config=mem_cfg, output_tensor=out_tensor)

golden_tensor = torch.eq(in_data, other_data)
comp_pass = compare_pcc([out_tensor], [golden_tensor])
assert comp_pass
104 changes: 93 additions & 11 deletions tt_eager/tt_dnn/op_library/eltwise_binary/eltwise_binary_op.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,19 +107,65 @@ struct EltwiseBinary {
const operation::Hash compute_program_hash(const std::vector<Tensor> &input_tensors) const;
};

template <BinaryOpType binary_op_type>
struct make_eltwise_binary {
Tensor operator()(
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations = std::nullopt,
const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<const DataType> output_dtype = std::nullopt,
std::optional<Tensor> output_tensor = std::nullopt) const {
inline Tensor run_eltwise_binary(
uint8_t queue_id,
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations,
const MemoryConfig &output_mem_config,
std::optional<const DataType> output_dtype,
std::optional<Tensor> output_tensor,
BinaryOpType binary_op_type) {
std::vector<Tensor> output_tensors = {Tensor(operation::get_workers_for_op_output({input_tensor_a, input_tensor_b}))};
operation::launch_op(
[fused_activations, output_mem_config, output_dtype, output_tensor, queue_id, binary_op_type] (const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {
Tensor in_a = input_tensors.at(0);
Tensor in_b = input_tensors.at(1);
Shape shape_a = in_a.get_legacy_shape();
Shape shape_b = in_b.get_legacy_shape();
if (shape_a[0] != shape_b[0])
{
if (shape_a[0] > shape_b[0])
{
Shape shape ({shape_a[0],1,1,1});
in_b = repeat(in_b, shape, output_mem_config);
}
else
{
Shape shape ({shape_b[0],1,1,1});
in_a = repeat(in_a, shape, output_mem_config);
}
}
TT_FATAL(
(in_a.get_legacy_shape() == in_b.get_legacy_shape()) or
(in_a.get_legacy_shape().without_padding() == in_b.get_legacy_shape().without_padding()),
"Input shapes must be the same!");

auto output_tensors = operation::run(
EltwiseBinary{
binary_op_type,
fused_activations,
output_mem_config,
output_dtype.value_or(in_a.get_dtype()),
false /*in place*/},
{in_a, in_b}, {}, {output_tensor}, queue_id);
return output_tensors;
},
{input_tensor_a, input_tensor_b}, output_tensors, {}, {output_tensor});
return output_tensors.at(0);
}

inline Tensor run_eltwise_binary(
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations,
const MemoryConfig &output_mem_config,
std::optional<const DataType> output_dtype,
std::optional<Tensor> output_tensor,
BinaryOpType binary_op_type) {
std::vector<Tensor> output_tensors = {Tensor(operation::get_workers_for_op_output({input_tensor_a, input_tensor_b}))};
operation::launch_op(
[fused_activations, output_mem_config, output_dtype, output_tensor] (const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {
[fused_activations, output_mem_config, output_dtype, output_tensor, binary_op_type] (const std::vector<Tensor>& input_tensors, const std::vector<std::optional<const Tensor>>& optional_input_tensors, const std::vector<std::optional<Tensor>>& optional_output_tensors) mutable -> std::vector<Tensor> {
Tensor in_a = input_tensors.at(0);
Tensor in_b = input_tensors.at(1);
Shape shape_a = in_a.get_legacy_shape();
Expand Down Expand Up @@ -154,9 +200,46 @@ struct make_eltwise_binary {
},
{input_tensor_a, input_tensor_b}, output_tensors, {}, {output_tensor});
return output_tensors.at(0);
}

template <BinaryOpType binary_op_type>
struct make_eltwise_binary {
Tensor operator()(
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations = std::nullopt,
const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<const DataType> output_dtype = std::nullopt,
std::optional<Tensor> output_tensor = std::nullopt) const {
return run_eltwise_binary(
input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, binary_op_type
);
}
};

inline Tensor eq(
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations = std::nullopt,
const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<const DataType> output_dtype = std::nullopt,
std::optional<Tensor> output_tensor = std::nullopt) {
return run_eltwise_binary(
input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, BinaryOpType::EQ);
}

inline Tensor eq(
uint8_t queue_id,
const Tensor &input_tensor_a,
const Tensor &input_tensor_b,
std::optional<std::vector<UnaryWithParam>> fused_activations = std::nullopt,
const MemoryConfig &output_mem_config = operation::DEFAULT_OUTPUT_MEMORY_CONFIG,
std::optional<const DataType> output_dtype = std::nullopt,
std::optional<Tensor> output_tensor = std::nullopt) {
return run_eltwise_binary(
queue_id, input_tensor_a, input_tensor_b, fused_activations, output_mem_config, output_dtype, output_tensor, BinaryOpType::EQ);
}

// arithmetic binary ops
constexpr auto add = make_eltwise_binary<BinaryOpType::ADD>{};
constexpr auto sub = make_eltwise_binary<BinaryOpType::SUB>{};
Expand All @@ -173,7 +256,6 @@ constexpr auto lt = make_eltwise_binary<BinaryOpType::LT>{};
constexpr auto gt = make_eltwise_binary<BinaryOpType::GT>{};
constexpr auto lte = make_eltwise_binary<BinaryOpType::LTE>{};
constexpr auto gte = make_eltwise_binary<BinaryOpType::GTE>{};
constexpr auto eq = make_eltwise_binary<BinaryOpType::EQ>{};
constexpr auto ne = make_eltwise_binary<BinaryOpType::NE>{};

// logical ops
Expand Down
21 changes: 20 additions & 1 deletion tt_eager/tt_lib/csrc/tt_lib_bindings_tensor_xary_ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,32 @@ namespace tt::tt_metal::detail {
detail::bind_binary_op(m_tensor, "lt", lt, R"doc(Perform an eltwise-binary less-than (``{0} < {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "lte", lte, R"doc(Perform an eltwise-binary less-than-or-equal (``{0} <= {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "gte", gte, R"doc(Perform an eltwise-binary greater-than-or-equal (``{0} >= {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "eq", eq, R"doc(Perform an eltwise-binary equal (``{0} == {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "eq", py::overload_cast<const Tensor&, const Tensor&, std::optional<std::vector<UnaryWithParam>>, const MemoryConfig&, std::optional<const DataType>, std::optional<Tensor> >(&eq), R"doc(Perform an eltwise-binary equal (``{0} == {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "ne", ne, R"doc(Perform an eltwise-binary not-equal (``{0} != {1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "ldexp", ldexp, R"doc(Performs eltwise-binary ldexp (``{0} * 2**{1}``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "logaddexp", logaddexp, R"doc(Perform an eltwise-binary logaddexp (``log(exp({0}) + exp({1}))``) on two tensors.)doc");
detail::bind_binary_op(m_tensor, "logaddexp2", logaddexp2, R"doc(Perform an eltwise-binary logaddexp2 (``log2(2^({0}) + 2^({1}))``) on two tensors for input range [-64,64].)doc");
detail::bind_binary_op(m_tensor, "logical_or", logical_or, R"doc(Perform an eltwise-binary logical OR (``{0} || {1}``) on two tensors.)doc");

m_tensor.def("eq", py::overload_cast<uint8_t, const Tensor&, const Tensor&, std::optional<std::vector<UnaryWithParam>>, const MemoryConfig&, std::optional<const DataType>, std::optional<Tensor> >(&eq),
py::arg("queue_id").noconvert() = 0, py::arg("input_a").noconvert(), py::arg("input_b").noconvert(), py::arg("fused_activations").noconvert() = std::nullopt, py::arg("output_mem_config").noconvert() = operation::DEFAULT_OUTPUT_MEMORY_CONFIG, py::arg("output_dtype").noconvert()= std::nullopt, py::arg("output_tensor").noconvert()= std::nullopt, R"doc(
Perform an eltwise-binary equal (``input_a`` == ``input_b``) on two tensors.
Input tensor must have BFLOAT16 data type.
Output tensors will have BFLOAT16 data type.
.. csv-table::
:header: "Argument", "Description", "Data type", "Valid range", "Required"
"queue_id", "queue_id", "uint8_t", "Default is 0", "No"
"input_a", "Tensor add is applied to", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"input_b", "Tensor", "Tensor", "Tensor of shape [W, Z, Y, X]", "Yes"
"fused_activations", "Fused activations after binary computation", "List of FusibleActivation with optional param", "Default is None", "No"
"output_mem_config", "Layout of tensor in TT Accelerator device memory banks", "MemoryConfig", "Default is interleaved in DRAM", "No"
"output_dtype", "Output tensor data type", "DataType", "Default is None (Use input dtype)", "No"
"output_tensor", "Optional output tensor", "Tensor", "Default is None", "No"
)doc");
// *** eltwise unary ***
detail::bind_unary_op(m_tensor, "identity", identity, R"doc(Returns a copy of same tensor ``input``; useful for profiling the SFPU.
this shouldn't normally be used; users should normally use clone operation instead for same functionality as this would be lower performance.
Expand Down

0 comments on commit 696dc36

Please sign in to comment.