From 1140bfd950f3d211306f10b6e40deabd91d55b16 Mon Sep 17 00:00:00 2001 From: Julien Olivain Date: Sun, 11 Feb 2024 23:30:46 +0100 Subject: [PATCH] support/testing: mdadm: improve test robustness on slow runners As expected by Peter in [1], the hardcoded 3 seconds for waiting the RAID array to rebuild are not enough on slow test host runners. This test already failed at least once for that reason, in [2]. In order to fix those failures, this commit adds extra logic to allow several attempts, before failing. The timeout is currently set at 10 attempts, waiting 3 seconds between each attempts. To help even more, those 3 seconds are also scaled with the timeout_multiplier. Fixes: [2] [1] https://lists.buildroot.org/pipermail/buildroot/2024-February/685034.html [2] https://gitlab.com/buildroot.org/buildroot/-/jobs/6137469690 Signed-off-by: Julien Olivain Signed-off-by: Arnout Vandecappelle --- support/testing/tests/package/test_mdadm.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/support/testing/tests/package/test_mdadm.py b/support/testing/tests/package/test_mdadm.py index 75385309a6..d5abdb0706 100644 --- a/support/testing/tests/package/test_mdadm.py +++ b/support/testing/tests/package/test_mdadm.py @@ -122,13 +122,21 @@ class TestMdadm(infra.basetest.BRTest): # We add back this blank drive to the array. self.assertRunOk(f"mdadm {md_dev} --add {failing_dev}") - # We wait few seconds to let the device rebuild. - time.sleep(3) + # Device rebuild can take a variable amount of time, depending + # on the load of the test controller host. So we will allow + # several attempts, before failing. + for attempt in range(10): + # We wait few seconds to let the device rebuild. + time.sleep(3 * self.timeout_multiplier) - # The array should no longer be marked as degraded. - out, ret = self.emulator.run(monitor_cmd) - self.assertEqual(ret, 0) - self.assertNotIn("DegradedArray", "\n".join(out)) + # Once rebuilt, the array should no longer be marked as + # degraded. + out, ret = self.emulator.run(monitor_cmd) + self.assertEqual(ret, 0) + if "DegradedArray" not in "\n".join(out): + break + else: + self.fail("Timeout while waiting for the array to rebuild.") # With all those array manipulations, the data file should not # be corrupted. We should be able to recompute the same hash