From 1140bfd950f3d211306f10b6e40deabd91d55b16 Mon Sep 17 00:00:00 2001
From: Julien Olivain <ju.o@free.fr>
Date: Sun, 11 Feb 2024 23:30:46 +0100
Subject: [PATCH] support/testing: mdadm: improve test robustness on slow
 runners

As expected by Peter in [1], the hardcoded 3 seconds for waiting the
RAID array to rebuild are not enough on slow test host runners. This
test already failed at least once for that reason, in [2].

In order to fix those failures, this commit adds extra logic to allow
several attempts, before failing. The timeout is currently set at 10
attempts, waiting 3 seconds between each attempts. To help even more,
those 3 seconds are also scaled with the timeout_multiplier.

Fixes: [2]

[1] https://lists.buildroot.org/pipermail/buildroot/2024-February/685034.html
[2] https://gitlab.com/buildroot.org/buildroot/-/jobs/6137469690

Signed-off-by: Julien Olivain <ju.o@free.fr>
Signed-off-by: Arnout Vandecappelle <arnout@mind.be>
---
 support/testing/tests/package/test_mdadm.py | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/support/testing/tests/package/test_mdadm.py b/support/testing/tests/package/test_mdadm.py
index 75385309a6..d5abdb0706 100644
--- a/support/testing/tests/package/test_mdadm.py
+++ b/support/testing/tests/package/test_mdadm.py
@@ -122,13 +122,21 @@ class TestMdadm(infra.basetest.BRTest):
         # We add back this blank drive to the array.
         self.assertRunOk(f"mdadm {md_dev} --add {failing_dev}")
 
-        # We wait few seconds to let the device rebuild.
-        time.sleep(3)
+        # Device rebuild can take a variable amount of time, depending
+        # on the load of the test controller host. So we will allow
+        # several attempts, before failing.
+        for attempt in range(10):
+            # We wait few seconds to let the device rebuild.
+            time.sleep(3 * self.timeout_multiplier)
 
-        # The array should no longer be marked as degraded.
-        out, ret = self.emulator.run(monitor_cmd)
-        self.assertEqual(ret, 0)
-        self.assertNotIn("DegradedArray", "\n".join(out))
+            # Once rebuilt, the array should no longer be marked as
+            # degraded.
+            out, ret = self.emulator.run(monitor_cmd)
+            self.assertEqual(ret, 0)
+            if "DegradedArray" not in "\n".join(out):
+                break
+        else:
+            self.fail("Timeout while waiting for the array to rebuild.")
 
         # With all those array manipulations, the data file should not
         # be corrupted. We should be able to recompute the same hash