@@ -241,7 +241,7 @@ def test_make_short_report_benchmarking_failed(sample_eval_result: EvalResult):
241241 sample_eval_result .run .success = False
242242 sample_eval_result .compilation = None
243243 sample_eval_result .run .exit_code = consts .ExitCode .CUDA_FAIL
244- runs = {"benchmark " : sample_eval_result }
244+ runs = {"private " : sample_eval_result }
245245
246246 result = make_short_report (runs , full = False )
247247 assert result == ["❌ Running benchmarks failed (cuda api error)" ]
@@ -274,27 +274,27 @@ def test_make_short_report_leaderboard_failed(sample_eval_result: EvalResult):
274274 sample_eval_result .run .success = False
275275 sample_eval_result .compilation = None
276276 sample_eval_result .run .exit_code = consts .ExitCode .TEST_SPEC
277- runs = {"leaderboard " : sample_eval_result }
277+ runs = {"public " : sample_eval_result }
278278
279279 result = make_short_report (runs , full = False )
280- assert result == ["❌ Running leaderboard failed (internal error 113)" ]
280+ assert result == ["❌ Running ranked submission failed (internal error 113)" ]
281281
282282 sample_eval_result .run .success = True
283283 sample_eval_result .run .passed = False
284284 sample_eval_result .run .exit_code = consts .ExitCode .VALIDATE_FAIL
285285 result = make_short_report (runs )
286286 # TODO is this actually possible? Should profiling do **any** correctness testing?
287- assert result == ["❌ Tests missing" , "❌ Benchmarks missing" , "❌ Leaderboard run failed" ]
287+ assert result == ["❌ Tests missing" , "❌ Benchmarks missing" , "❌ Ranked submission failed" ]
288288
289289
290290def test_make_short_report_empty ():
291291 result = make_short_report ({})
292- assert result == ["❌ Tests missing" , "❌ Benchmarks missing" , "❌ Leaderboard missing" ]
292+ assert result == ["❌ Tests missing" , "❌ Benchmarks missing" , "❌ Ranked submission missing" ]
293293
294294
295295def test_make_short_report_full_success ():
296296 runs = {}
297- for run_type in ["test" , "benchmark " , "profile" , "leaderboard " ]:
297+ for run_type in ["test" , "private " , "profile" , "public " ]:
298298 runs [run_type ] = EvalResult (
299299 start = datetime .datetime .now () - datetime .timedelta (minutes = 5 ),
300300 end = datetime .datetime .now (),
@@ -318,7 +318,7 @@ def test_make_short_report_full_success():
318318 "✅ Testing successful" ,
319319 "✅ Benchmarking successful" ,
320320 "✅ Profiling successful" ,
321- "✅ Leaderboard run successful" ,
321+ "✅ Ranked submission successful" ,
322322 ]
323323 assert result == expected
324324
@@ -331,7 +331,7 @@ def test_make_short_report_missing_components():
331331 "✅ Compilation successful" ,
332332 "✅ Testing successful" ,
333333 "❌ Benchmarks missing" ,
334- "❌ Leaderboard missing" ,
334+ "❌ Ranked submission missing" ,
335335 ]
336336 assert result == expected
337337
@@ -532,7 +532,7 @@ def test_generate_report_test_failure(sample_full_result: FullResult):
532532def test_generate_report_benchmark_failure (sample_full_result : FullResult ):
533533 from libkernelbot .report import Log , Text
534534
535- sample_full_result .runs ["benchmark " ] = create_eval_result ()
535+ sample_full_result .runs ["private " ] = create_eval_result ()
536536 report = generate_report (sample_full_result )
537537 assert report .data == [
538538 Text (
@@ -557,8 +557,8 @@ def test_generate_report_benchmark_failure(sample_full_result: FullResult):
557557 Log (header = "Benchmarks" , content = "❗ Could not find any benchmarks" ),
558558 ]
559559
560- sample_full_result .runs ["benchmark " ].run .passed = False
561- sample_full_result .runs ["benchmark " ].run .result = {
560+ sample_full_result .runs ["private " ].run .passed = False
561+ sample_full_result .runs ["private " ].run .result = {
562562 "benchmark-count" : "2" ,
563563 "benchmark.0.status" : "pass" ,
564564 "benchmark.0.spec" : "Basic functionality" ,
@@ -607,7 +607,7 @@ def test_generate_report_benchmark_failure(sample_full_result: FullResult):
607607def test_generate_report_leaderboard_failure (sample_full_result : FullResult ):
608608 from libkernelbot .report import Log , Text
609609
610- sample_full_result .runs ["leaderboard " ] = create_eval_result ()
610+ sample_full_result .runs ["public " ] = create_eval_result ()
611611 report = generate_report (sample_full_result )
612612 assert report .data == [
613613 Text (
@@ -632,9 +632,9 @@ def test_generate_report_leaderboard_failure(sample_full_result: FullResult):
632632 Log (header = "Ranked Benchmark" , content = "❗ Could not find any benchmarks" ),
633633 ]
634634
635- sample_full_result .runs ["leaderboard " ].run .success = False
636- sample_full_result .runs ["leaderboard " ].run .exit_code = consts .ExitCode .TIMEOUT_EXPIRED
637- sample_full_result .runs ["leaderboard " ].run .duration = 10.0
635+ sample_full_result .runs ["public " ].run .success = False
636+ sample_full_result .runs ["public " ].run .exit_code = consts .ExitCode .TIMEOUT_EXPIRED
637+ sample_full_result .runs ["public " ].run .duration = 10.0
638638
639639 report = generate_report (sample_full_result )
640640 assert report .data == [
0 commit comments