From a9554736441cb182958afe7e80679e1c5c186d12 Mon Sep 17 00:00:00 2001
From: Luke Wren <wren6991@gmail.com>
Date: Sat, 23 Nov 2024 00:16:51 +0000
Subject: [PATCH] RISC-V __mulsf3: handle exponent increase due to rounding

---
 src/rp2_common/pico_float/float_single_hazard3.S | 9 +++++++--
 test/pico_float_test/pico_float_test_hazard3.c   | 8 ++++++++
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/src/rp2_common/pico_float/float_single_hazard3.S b/src/rp2_common/pico_float/float_single_hazard3.S
index 1e57f1f25..ce31fcd94 100644
--- a/src/rp2_common/pico_float/float_single_hazard3.S
+++ b/src/rp2_common/pico_float/float_single_hazard3.S
@@ -216,8 +216,6 @@ __mulsf3:
     clz a0, a4
     sll a4, a4, a0
     sub a2, a2, a0
-    // After normalising we can calculate the final exponent, since rounding
-    // cannot increase the exponent for multiplication (unlike addition)
     add a2, a2, a3
     // Subtract redundant bias term (127), add 1 for normalisation correction
     addi a2, a2, -126
@@ -231,6 +229,9 @@ __mulsf3:
     bexti a1, a4, 8
     add a4, a4, a1
     addi a4, a4, 127
+    // Check carry-out: exponent may increase due to rounding
+    bgez a4, 2f
+1:
     // Pack it and ship it
     packh a2, a2, a6
     slli a2, a2, 23
@@ -238,6 +239,10 @@ __mulsf3:
     srli a4, a4, 9
     add a0, a4, a2
     ret
+2:
+    srli a4, a4, 1
+    addi a2, a2, 1
+    j 1b
 
 __mulsf_underflow:
     // Signed zero
diff --git a/test/pico_float_test/pico_float_test_hazard3.c b/test/pico_float_test/pico_float_test_hazard3.c
index 61f2a8e19..336d41ac0 100644
--- a/test/pico_float_test/pico_float_test_hazard3.c
+++ b/test/pico_float_test/pico_float_test_hazard3.c
@@ -149,6 +149,14 @@ test_t mul_directed_tests[] = {
     // 1.25 x 2^-63 x 1.25 x 2^-64 = 0
     // (normal inputs with subnormal output, and we claim to be FTZ)
     {0x20200000u, 0x1fa00000u, 0x00000000u},
+    // 1.333333 (rounded down) x 1.5 = 2 - 1 ulp
+    {0x3faaaaaau, 0x3fc00000u, 0x3fffffffu},
+    // 1.333333 (rounded down) x (1.5 + 1 ulp) = 2 exactly
+    {0x3faaaaaau, 0x3fc00001u, 0x40000000u},
+    // (1.333333 (rounded down) + 1 ulp) x 1.5 = 2 exactly
+    {0x3faaaaabu, 0x3fc00000u, 0x40000000u},
+    // (1.25 - 1 ulp) x (0.8 + 1 ulp) = 1 exactly (exponent increases after rounding)
+    {0x3f9fffffu, 0x3f4cccceu, 0x3f800000u},
 };
 
 #define N_RANDOM_TESTS 1000