From 65d118c342ef8c5c47c2fc6cbc900a871edacf81 Mon Sep 17 00:00:00 2001
From: Luke Wren <wren6991@gmail.com>
Date: Sat, 23 Nov 2024 19:02:03 +0000
Subject: [PATCH] RISC-V __mulsf3: handle exponent increase due to rounding
 (#2086)

---
 src/rp2_common/pico_float/float_single_hazard3.S | 11 ++++++++---
 test/pico_float_test/pico_float_test_hazard3.c   | 12 ++++++++++++
 2 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/rp2_common/pico_float/float_single_hazard3.S b/src/rp2_common/pico_float/float_single_hazard3.S
index 1e57f1f25..ee2f1c72b 100644
--- a/src/rp2_common/pico_float/float_single_hazard3.S
+++ b/src/rp2_common/pico_float/float_single_hazard3.S
@@ -216,13 +216,10 @@ __mulsf3:
     clz a0, a4
     sll a4, a4, a0
     sub a2, a2, a0
-    // After normalising we can calculate the final exponent, since rounding
-    // cannot increase the exponent for multiplication (unlike addition)
     add a2, a2, a3
     // Subtract redundant bias term (127), add 1 for normalisation correction
     addi a2, a2, -126
     blez a2, __mulsf_underflow
-    bge a2, t0, __mulsf_overflow
 
     // Gather sticky bits from low fraction:
     snez a1, a1
@@ -231,6 +228,10 @@ __mulsf3:
     bexti a1, a4, 8
     add a4, a4, a1
     addi a4, a4, 127
+    // Check carry-out: exponent may increase due to rounding
+    bgez a4, 2f
+1:
+    bge a2, t0, __mulsf_overflow
     // Pack it and ship it
     packh a2, a2, a6
     slli a2, a2, 23
@@ -238,6 +239,10 @@ __mulsf3:
     srli a4, a4, 9
     add a0, a4, a2
     ret
+2:
+    srli a4, a4, 1
+    addi a2, a2, 1
+    j 1b
 
 __mulsf_underflow:
     // Signed zero
diff --git a/test/pico_float_test/pico_float_test_hazard3.c b/test/pico_float_test/pico_float_test_hazard3.c
index 61f2a8e19..47da23b8d 100644
--- a/test/pico_float_test/pico_float_test_hazard3.c
+++ b/test/pico_float_test/pico_float_test_hazard3.c
@@ -149,6 +149,18 @@ test_t mul_directed_tests[] = {
     // 1.25 x 2^-63 x 1.25 x 2^-64 = 0
     // (normal inputs with subnormal output, and we claim to be FTZ)
     {0x20200000u, 0x1fa00000u, 0x00000000u},
+    // 1.333333 (rounded down) x 1.5 = 2 - 1 ulp
+    {0x3faaaaaau, 0x3fc00000u, 0x3fffffffu},
+    // 1.333333 (rounded down) x (1.5 + 1 ulp) = 2 exactly
+    {0x3faaaaaau, 0x3fc00001u, 0x40000000u},
+    // (1.333333 (rounded down) + 1 ulp) x 1.5 = 2 exactly
+    {0x3faaaaabu, 0x3fc00000u, 0x40000000u},
+    // (1.25 - 1 ulp) x (0.8 + 1 ulp) = 1 exactly (exponent increases after rounding)
+    {0x3f9fffffu, 0x3f4cccceu, 0x3f800000u},
+    // as above, but overflow on exponent increase -> +inf
+    {0x3f9fffffu, 0x7f4cccceu, 0x7f800000u},
+    // subtract 1 ulp from rhs -> largest normal
+    {0x3f9fffffu, 0x7f4ccccdu, 0x7f7fffffu},
 };
 
 #define N_RANDOM_TESTS 1000