@@ -977,6 +977,74 @@ def test_rebuild_different_worker_with_no_failures_exits_cleanly
977977 assert_match ( /All tests were ran already/ , out )
978978 end
979979
980+ def test_report_waits_for_retry_worker_to_clear_failures
981+ # Simulates the race condition seen in build 900737:
982+ # - Report step starts (BUILDKITE_RETRY_COUNT=1), sees queue exhausted immediately,
983+ # but error-reports still has a failure from the original run.
984+ # - A retry worker is concurrently running the failed test.
985+ # - Without the fix, report exits immediately and cancels the retry worker.
986+ # - With the fix, report waits up to inactive_workers_timeout for
987+ # retry workers to clear error-reports before reporting.
988+
989+ # First run: worker 1 fails a test
990+ out , err = capture_subprocess_io do
991+ system (
992+ @exe , 'run' ,
993+ '--queue' , @redis_url ,
994+ '--seed' , 'foobar' ,
995+ '--build' , '1' ,
996+ '--worker' , '1' ,
997+ '--timeout' , '1' ,
998+ '-Itest' ,
999+ 'test/flaky_test.rb' ,
1000+ chdir : 'test/fixtures/' ,
1001+ )
1002+ end
1003+ assert_empty filter_deprecation_warnings ( err )
1004+ assert_match ( /1 failures/ , normalize ( out ) )
1005+
1006+ # Start the report concurrently — it should block waiting for retry workers
1007+ report_out = nil
1008+ report_err = nil
1009+ report_thread = Thread . new do
1010+ report_out , report_err = capture_subprocess_io do
1011+ system (
1012+ { 'BUILDKITE_RETRY_COUNT' => '1' , 'BUILDKITE_RETRY_TYPE' => 'manual' } ,
1013+ @exe , 'report' ,
1014+ '--queue' , @redis_url ,
1015+ '--build' , '1' ,
1016+ '--timeout' , '1' ,
1017+ '--inactive-workers-timeout' , '10' ,
1018+ chdir : 'test/fixtures/' ,
1019+ )
1020+ end
1021+ end
1022+
1023+ # Give the report a moment to start, then run the retry worker which
1024+ # re-runs the failed test and clears error-reports
1025+ sleep 0.3
1026+ out , err = capture_subprocess_io do
1027+ system (
1028+ { 'BUILDKITE_RETRY_COUNT' => '1' , 'BUILDKITE_RETRY_TYPE' => 'manual' , 'FLAKY_TEST_PASS' => '1' } ,
1029+ @exe , 'run' ,
1030+ '--queue' , @redis_url ,
1031+ '--seed' , 'foobar' ,
1032+ '--build' , '1' ,
1033+ '--worker' , '2' ,
1034+ '--timeout' , '1' ,
1035+ '-Itest' ,
1036+ 'test/flaky_test.rb' ,
1037+ chdir : 'test/fixtures/' ,
1038+ )
1039+ end
1040+ assert_empty filter_deprecation_warnings ( err )
1041+ assert_match ( /Retrying failed tests/ , out )
1042+
1043+ report_thread . join ( 15 )
1044+ assert_empty filter_deprecation_warnings ( report_err || '' )
1045+ assert_match ( /0 failures/ , normalize ( report_out || '' ) )
1046+ end
1047+
9801048 def test_retry_fails_when_test_run_is_expired
9811049 out , err = capture_subprocess_io do
9821050 system (
0 commit comments