diff --git a/docs/source/ttnn/ttnn/api.rst b/docs/source/ttnn/ttnn/api.rst index 7ac786bebb7..5c60787fd36 100644 --- a/docs/source/ttnn/ttnn/api.rst +++ b/docs/source/ttnn/ttnn/api.rst @@ -114,6 +114,7 @@ Pointwise Unary ttnn.cosh ttnn.deg2rad ttnn.digamma + ttnn.dropout ttnn.elu ttnn.eqz ttnn.erf @@ -161,6 +162,7 @@ Pointwise Unary ttnn.normalize_global ttnn.normalize_hw ttnn.polygamma + ttnn.pow ttnn.prelu ttnn.rad2deg ttnn.rdiv @@ -197,6 +199,7 @@ Pointwise Unary ttnn.tril ttnn.triu ttnn.trunc + ttnn.unary_chain ttnn.clamp_bw ttnn.clip_bw ttnn.hardtanh_bw @@ -305,7 +308,6 @@ Pointwise Binary ttnn.logical_and_ ttnn.logical_or_ ttnn.logical_xor_ - ttnn.pow ttnn.rpow ttnn.ldexp ttnn.logical_and diff --git a/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp b/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp index cc2ab8a7fd4..1ed1fe0d081 100644 --- a/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp +++ b/ttnn/cpp/ttnn/operations/eltwise/unary/unary_pybind.hpp @@ -271,19 +271,23 @@ void bind_unary_operation_overload_complex( } template -void bind_unary_operation_overload_complex_return_complex(py::module& module, const unary_operation_t& operation, const std::string& supported_dtype = "BFLOAT16", const std::string& info_doc = "" ) { +void bind_unary_operation_overload_complex_return_complex( + py::module& module, + const unary_operation_t& operation, + const std::string& supported_dtype = "BFLOAT16", + const std::string& info_doc = "" ) { auto doc = fmt::format( R"doc( Applies {0} to :attr:`input_tensor` element-wise. .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \mathrm{{output\_tensor}}_i = \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: - input_tensor (ttnn.Tensor): the input tensor. + input_tensor (ttnn.Tensor or ComplexTensor): the input tensor. Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. @@ -306,7 +310,7 @@ void bind_unary_operation_overload_complex_return_complex(py::module& module, co {3} Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) >>> output = {1}(tensor) )doc", operation.base_name(), @@ -343,6 +347,7 @@ void bind_unary_operation_overload_complex_return_complex(py::module& module, co py::arg("memory_config")}); } + template void bind_unary_operation_with_fast_and_approximate_mode(py::module& module, const unary_operation_t& operation, const std::string& supported_dtype = "BFLOAT16", const std::string& info_doc = "" ) { auto doc = fmt::format( @@ -505,10 +510,10 @@ void bind_unary_operation_with_integer_parameter( Args: input_tensor (ttnn.Tensor): the input tensor. + {2} (int): {3}. Keyword Args: - {2} (int): {3}. - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. @@ -531,7 +536,8 @@ void bind_unary_operation_with_integer_parameter( {5} Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.int32), layout=ttnn.TILE_LAYOUT, device=device) + >>> {2} = 5 >>> output = {1}(tensor, {2}) )doc", operation.base_name(), @@ -580,14 +586,14 @@ void bind_unary_operation_with_dim_parameter( {4} .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \mathrm{{output\_tensor}}_i = \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: input_tensor (ttnn.Tensor): the input tensor. + {2} (int): {3}. Defaults to `-1`. Keyword Args: - {2} (int): {3}. - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. Returns: ttnn.Tensor: the output tensor. @@ -608,7 +614,8 @@ void bind_unary_operation_with_dim_parameter( {6} Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.rand([1, 1, 32, 64], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) + >>> {2} = 3 >>> output = {1}(tensor, {2}) )doc", operation.base_name(), @@ -783,15 +790,28 @@ void bind_sigmoid_accurate(py::module& module, const unary_operation_t& operatio input_tensor (ttnn.Tensor): the input tensor. Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. Returns: ttnn.Tensor: the output tensor. + Note: + Supported dtypes, layouts, and ranks: + + .. list-table:: + :header-rows: 1 + + * - Dtypes + - Layouts + - Ranks + * - BFLOAT16, BFLOAT8_B + - TILE + - 2, 3, 4 + Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) >>> output = {1}(tensor) )doc", ttnn::sigmoid_accurate.base_name(), @@ -823,23 +843,37 @@ void bind_unary_chain(py::module& module, const unary_operation_t& operation) { Applies {0} to :attr:`input_tensor` element-wise. .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \mathrm{{output\_tensor}}_i = \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: input_tensor (ttnn.Tensor): the input tensor. + ops_chain (list[ttnn.UnaryWithParam]): list of unary ops to be chained. Keyword Args: - ops_chain (list[ttnn.UnaryWithParam]): List of unary ops to be chained. - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. Returns: ttnn.Tensor: the output tensor. + Note: + Supported dtypes, layouts, and ranks: + + .. list-table:: + :header-rows: 1 + + * - Dtypes + - Layouts + - Ranks + * - BFLOAT16, BFLOAT8_B + - TILE + - 2, 3, 4 + Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.randn([32, 32], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) + >>> ops_chain = [ttnn.UnaryWithParam(ttnn.UnaryOpType.RELU), ttnn.UnaryWithParam(ttnn.UnaryOpType.EXP, False), ttnn.UnaryWithParam(ttnn.UnaryOpType.POWER, 2)] >>> output = {1}(tensor, ops_chain) )doc", ttnn::unary_chain.base_name(), @@ -870,25 +904,38 @@ template void bind_identity(py::module& module, const unary_operation_t& operation) { auto doc = fmt::format( R"doc( - Returns a copy of same tensor :attr:`input_tensor`; useful for profiling the SFPU. - This shouldn't normally be used; users should normally use clone operation instead for same functionality as this would be lower performance. + Returns a copy of the :attr:`input_tensor`; useful for profiling the SFPU. + This shouldn't normally be used. Users should normally use clone operation instead for the same functionality since this results in lower performance. .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \mathrm{{output\_tensor}}_i = \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: input_tensor (ttnn.Tensor): the input tensor. Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. Returns: ttnn.Tensor: the output tensor. + Note: + Supported dtypes, layouts, and ranks: + + .. list-table:: + :header-rows: 1 + + * - Dtypes + - Layouts + - Ranks + * - BFLOAT16, BFLOAT8_B, FLOAT32, UINT32, UINT16, UINT8 + - TILE + - 2, 3, 4 + Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.float16), dtype=ttnn.bfloat16, layout=ttnn.TILE_LAYOUT, device=device) >>> output = {1}(tensor) )doc", ttnn::identity.base_name(), @@ -914,21 +961,21 @@ void bind_identity(py::module& module, const unary_operation_t& operation) { } template -void bind_power(py::module& module, const unary_operation_t& operation, const std::string& supported_dtype="BFLOAT16", const std::string& info_doc = "") { +void bind_power(py::module& module, const unary_operation_t& operation, const std::string& note = "") { auto doc = fmt::format( R"doc( Applies {0} to :attr:`input_tensor` element-wise. .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \mathrm{{output\_tensor}}_i = \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: input_tensor (ttnn.Tensor): the input tensor. exponent (float, int): the exponent value. Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. - output_tensor (ttnn.Tensor, optional): Preallocated output tensor. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. + output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. Returns: @@ -943,20 +990,20 @@ void bind_power(py::module& module, const unary_operation_t& operation, const st * - Dtypes - Layouts - Ranks - * - {2} + * - BFLOAT16, BFLOAT8_B - TILE - 2, 3, 4 - {3} + {2} Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) + >>> exponent = 2 >>> output = {1}(tensor, exponent) )doc", ttnn::pow.base_name(), ttnn::pow.python_fully_qualified_name(), - supported_dtype, - info_doc); + note); bind_registered_operation( module, @@ -1074,7 +1121,9 @@ void bind_unary_composite_int_with_default( const std::string& parameter_name_a, const std::string& parameter_a_doc, int32_t parameter_a_value, - const std::string& description) { + const std::string& description, + const std::string& supported_dtype = "BFLOAT16", + const std::string& note = "") { auto doc = fmt::format( R"doc( {5} @@ -1084,13 +1133,28 @@ void bind_unary_composite_int_with_default( Keyword args: {2} (float): {3}. Defaults to `{4}`. - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. Returns: ttnn.Tensor: the output tensor. + Note: + Supported dtypes, layouts, and ranks: + + .. list-table:: + :header-rows: 1 + + * - Dtypes + - Layouts + - Ranks + * - {6} + - TILE + - 2, 3, 4 + + {7} + Example: - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) + >>> tensor = ttnn.from_torch(torch.rand([2, 2], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) >>> output = {1}(tensor, {2} = {4}) )doc", operation.base_name(), @@ -1098,7 +1162,9 @@ void bind_unary_composite_int_with_default( parameter_name_a, parameter_a_doc, parameter_a_value, - description); + description, + supported_dtype, + note); bind_registered_operation( module, @@ -1582,184 +1648,49 @@ void bind_unary_composite_float( py::arg("memory_config") = std::nullopt}); } -template -void bind_unary_operation_with_scale_and_shift(py::module& module, const unary_operation_t& operation) { - auto doc = fmt::format( - R"doc( - Applies {0} to :attr:`input_tensor` element-wise. - - .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) - - Args: - input_tensor (ttnn.Tensor): the input tensor. - scale (float) - shift (float) - - Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. - output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. - queue_id (int, optional): command queue id. Defaults to `0`. - - Returns: - ttnn.Tensor: the output tensor. - - Example:: - - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) - >>> output = {1}(tensor) - )doc", - operation.base_name(), - operation.python_fully_qualified_name()); - - bind_registered_operation( - module, - operation, - doc, - ttnn::pybind_overload_t{ - [](const unary_operation_t& self, - const Tensor& input_tensor, - float scale, - float shift, - const std::optional& memory_config, - const std::optional& output_tensor, - const uint8_t& queue_id) { - return self(queue_id, input_tensor, scale, shift, memory_config, output_tensor); - }, - py::arg("input_tensor"), - py::arg("scale")=1.0f/6.0f, - py::arg("shift")=0.5f, - py::kw_only(), - py::arg("memory_config") = std::nullopt, - py::arg("output_tensor") = std::nullopt, - py::arg("queue_id") = 0}); -} template -void bind_unary_operation_with_low_and_high(py::module& module, const unary_operation_t& operation) { +void bind_dropout(py::module& module, const unary_operation_t& operation) { auto doc = fmt::format( R"doc( + Applies {0} to :attr:`input_tensor` element-wise. .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) + \verb|{0}|(\mathrm{{input\_tensor}}_i) Args: input_tensor (ttnn.Tensor): the input tensor. - low (float) - high (float) Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. + seed (uint32_t): seed used for RNG. + probability (float): Dropout probability. In average total_elems * probability elements will be zeroed out. + scale (float): Scales output tensor. In general scale = 1.0/(1.0-probability). + memory_config (ttnn.MemoryConfig, optional): memory configuration for the operation. Defaults to `None`. output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. queue_id (int, optional): command queue id. Defaults to `0`. Returns: ttnn.Tensor: the output tensor. - Example:: - - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) - >>> output = {1}(tensor) - )doc", - operation.base_name(), - operation.python_fully_qualified_name()); - - bind_registered_operation( - module, - operation, - doc, - ttnn::pybind_overload_t{ - [](const unary_operation_t& self, - const Tensor& input_tensor, - float low, - float high, - const std::optional& memory_config, - const std::optional& output_tensor, - const uint8_t& queue_id) { - return self(queue_id, input_tensor, low, high, memory_config, output_tensor); - }, - py::arg("input_tensor"), - py::arg("low") = -1.0f, - py::arg("high") = 1.0f, - py::kw_only(), - py::arg("memory_config") = std::nullopt, - py::arg("output_tensor") = std::nullopt, - py::arg("queue_id") = 0}); -} - -template -void bind_unary_operation_with_diag(py::module& module, const unary_operation_t& operation) { - auto doc = fmt::format( - R"doc( - Applies {0} to :attr:`input_tensor` element-wise. - - .. math:: - \mathrm{{output\_tensor}}_i = {0}(\mathrm{{input\_tensor}}_i) - - Args: - input_tensor (ttnn.Tensor): the input tensor. - diag (int) - - Keyword Args: - memory_config (ttnn.MemoryConfig, optional): Memory configuration for the operation. Defaults to `None`. - output_tensor (ttnn.Tensor, optional): preallocated output tensor. Defaults to `None`. - queue_id (int, optional): command queue id. Defaults to `0`. - - Example:: - - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) - >>> output = {1}(tensor) - )doc", - operation.base_name(), - operation.python_fully_qualified_name()); - - bind_registered_operation( - module, - operation, - doc, - ttnn::pybind_overload_t{ - [](const unary_operation_t& self, - const Tensor& input_tensor, - int32_t diag, - const std::optional& memory_config, - const std::optional& output_tensor, - const uint8_t& queue_id) { - return self(queue_id, input_tensor, diag, memory_config, output_tensor); }, - py::arg("input_tensor"), - py::arg("diag") = 0, - py::kw_only(), - py::arg("memory_config") = std::nullopt, - py::arg("output_tensor") = std::nullopt, - py::arg("queue_id") = 0}); -} - - -template -void bind_dropout(py::module& module, const unary_operation_t& operation) { - auto doc = fmt::format( - R"doc({0}(input_tensor: ttnn.Tensor, *, seed: uint32_t, probability: float, scale: float, memory_config: Optional[ttnn.MemoryConfig] = None) -> ttnn.Tensor - - Applies {0} to :attr:`input_tensor` element-wise. - - .. math:: - {0}(\\mathrm{{input\\_tensor}}_i) - - Args: - * :attr:`input_tensor` + Note: + Supported dtypes, layouts, and ranks: - Keyword Args: - * :attr:`seed` (uint32_t): seed used for RNG - * :attr:`probability` (float): Dropout probability. In average total_elems * probability elements will be zero out. - * :attr:`scale` (float): Scales output tensor. In general scale == 1.0/(1.0-probability) - * :attr:`memory_config` (Optional[ttnn.MemoryConfig]): Memory configuration for the operation. - * :attr:`output_tensor` (Optional[ttnn.Tensor]): preallocated output tensor - * :attr:`queue_id` (Optional[uint8]): command queue id + .. list-table:: + :header-rows: 1 - Example: + * - Dtypes + - Layouts + - Ranks + * - BFLOAT16 + - TILE + - 2, 3, 4 - >>> tensor = ttnn.from_torch(torch.tensor((1, 2), dtype=torch.bfloat16), device=device) - >>> output = {1}(tensor, seed=42, probability=0.2, scale= 1.0/(1.0 - probability)) + Example: + >>> tensor = ttnn.from_torch(torch.tensor([[1, 2], [3, 4]], dtype=torch.bfloat16), layout=ttnn.TILE_LAYOUT, device=device) + >>> seed = 124 + >>> prob = 0.2 + >>> output = {1}(tensor, seed=seed, probability=prob, scale= 1.0/(1.0 - prob)) )doc", ttnn::dropout.base_name(), ttnn::dropout.python_fully_qualified_name()); @@ -1869,9 +1800,9 @@ void py_module(py::module& module) { // Unaries with integer parameter detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_left_shift, "shift_bits", "integer within range (0, 31)", "INT32", "Support provided for Wormhole_B0 only."); detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_right_shift, "shift_bits", "integer within range (0, 31)", "INT32", "Support provided for Wormhole_B0 only."); - detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_and, "value", "scalar value","INT32", "Input tensor needs to be positive.Support provided only for Wormhole_B0."); - detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_or, "value", "scalar value", "INT32", "Input tensor needs to be positive.Support provided only for Wormhole_B0."); - detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_xor, "value", "scalar value","INT32", "Input tensor needs to be positive.Support provided only for Wormhole_B0."); + detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_and, "value", "scalar value", "INT32", "Input tensor needs to be positive. Support provided only for Wormhole_B0."); + detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_or, "value", "scalar value", "INT32", "Input tensor needs to be positive. Support provided only for Wormhole_B0."); + detail::bind_unary_operation_with_integer_parameter(module, ttnn::bitwise_xor, "value", "scalar value","INT32", "Input tensor needs to be positive. Support provided only for Wormhole_B0."); // Unary ops with dim parameter @@ -1933,7 +1864,7 @@ void py_module(py::module& module) { detail::bind_sigmoid_accurate(module, ttnn::sigmoid_accurate); detail::bind_unary_chain(module, ttnn::unary_chain); detail::bind_identity(module, ttnn::identity); - detail::bind_power(module, ttnn::pow, R"doc(BFLOAT16, BFLOAT8_B)doc"); + detail::bind_power(module, ttnn::pow); // unary composite imported into ttnn detail::bind_unary_composite(module, ttnn::deg2rad, R"doc(Performs deg2rad function on :attr:`input_tensor`.)doc", "", R"doc(BFLOAT16, BFLOAT8_B)doc"); @@ -2013,17 +1944,21 @@ void py_module(py::module& module) { module, ttnn::tril, "diagonal", "diagonal value", 0, - R"doc(Performs tril function on :attr:`input_tensor`, :attr:`diagonal`.)doc"); + R"doc(Performs tril function on :attr:`input_tensor`, :attr:`diagonal`.)doc", + R"doc(BFLOAT16, BFLOAT8_B)doc"); detail::bind_unary_composite_int_with_default( module, ttnn::triu, "diagonal", "diagonal value", 0, - R"doc(Performs triu function on :attr:`input_tensor`, :attr:`diagonal`.)doc"); + R"doc(Performs triu function on :attr:`input_tensor`, :attr:`diagonal`.)doc", + R"doc(BFLOAT16, BFLOAT8_B)doc"); detail::bind_unary_composite_int_with_default( module, ttnn::round, "decimals", "decimals value", 0, - R"doc(Performs round function on :attr:`input_tensor`, not supported for grayskull, :attr:`decimals`.)doc"); + R"doc(Performs round function on :attr:`input_tensor`, :attr:`decimals`.)doc", + R"doc(BFLOAT16, BFLOAT8_B)doc", + R"doc(Not supported on Grayskull.)doc"); detail::bind_unary_composite_int( module, ttnn::polygamma,