Skip to content

Commit f04c2c3

Browse files
committed
Add integration test for report waiting on retry workers
1 parent f8c3847 commit f04c2c3

1 file changed

Lines changed: 68 additions & 0 deletions

File tree

ruby/test/integration/minitest_redis_test.rb

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,74 @@ def test_rebuild_different_worker_with_no_failures_exits_cleanly
977977
assert_match(/All tests were ran already/, out)
978978
end
979979

980+
def test_report_waits_for_retry_worker_to_clear_failures
981+
# Simulates the race condition seen in build 900737:
982+
# - Report step starts (BUILDKITE_RETRY_COUNT=1), sees queue exhausted immediately,
983+
# but error-reports still has a failure from the original run.
984+
# - A retry worker is concurrently running the failed test.
985+
# - Without the fix, report exits immediately and cancels the retry worker.
986+
# - With the fix, report waits up to inactive_workers_timeout for
987+
# retry workers to clear error-reports before reporting.
988+
989+
# First run: worker 1 fails a test
990+
out, err = capture_subprocess_io do
991+
system(
992+
@exe, 'run',
993+
'--queue', @redis_url,
994+
'--seed', 'foobar',
995+
'--build', '1',
996+
'--worker', '1',
997+
'--timeout', '1',
998+
'-Itest',
999+
'test/flaky_test.rb',
1000+
chdir: 'test/fixtures/',
1001+
)
1002+
end
1003+
assert_empty filter_deprecation_warnings(err)
1004+
assert_match(/1 failures/, normalize(out))
1005+
1006+
# Start the report concurrently — it should block waiting for retry workers
1007+
report_out = nil
1008+
report_err = nil
1009+
report_thread = Thread.new do
1010+
report_out, report_err = capture_subprocess_io do
1011+
system(
1012+
{ 'BUILDKITE_RETRY_COUNT' => '1', 'BUILDKITE_RETRY_TYPE' => 'manual' },
1013+
@exe, 'report',
1014+
'--queue', @redis_url,
1015+
'--build', '1',
1016+
'--timeout', '1',
1017+
'--inactive-workers-timeout', '10',
1018+
chdir: 'test/fixtures/',
1019+
)
1020+
end
1021+
end
1022+
1023+
# Give the report a moment to start, then run the retry worker which
1024+
# re-runs the failed test and clears error-reports
1025+
sleep 0.3
1026+
out, err = capture_subprocess_io do
1027+
system(
1028+
{ 'BUILDKITE_RETRY_COUNT' => '1', 'BUILDKITE_RETRY_TYPE' => 'manual', 'FLAKY_TEST_PASS' => '1' },
1029+
@exe, 'run',
1030+
'--queue', @redis_url,
1031+
'--seed', 'foobar',
1032+
'--build', '1',
1033+
'--worker', '2',
1034+
'--timeout', '1',
1035+
'-Itest',
1036+
'test/flaky_test.rb',
1037+
chdir: 'test/fixtures/',
1038+
)
1039+
end
1040+
assert_empty filter_deprecation_warnings(err)
1041+
assert_match(/Retrying failed tests/, out)
1042+
1043+
report_thread.join(15)
1044+
assert_empty filter_deprecation_warnings(report_err || '')
1045+
assert_match(/0 failures/, normalize(report_out || ''))
1046+
end
1047+
9801048
def test_retry_fails_when_test_run_is_expired
9811049
out, err = capture_subprocess_io do
9821050
system(

0 commit comments

Comments
 (0)