11import copy
22import dataclasses
33import datetime
4+ import decimal
45import subprocess
56import time
7+ from pathlib import Path
8+ from unittest.mock import ANY
69
710import pytest
811from test_report import sample_compile_result, sample_run_result, sample_system_info
1619
1720@pytest.fixture(scope="module")
1821def docker_compose():
22+ tgt_path = Path.cwd()
23+ if tgt_path.name == "unit-tests":
24+ tgt_path = tgt_path.parent
25+
1926 """Start a test database and run migrations"""
20- subprocess.check_call(["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"])
27+ subprocess.check_call(
28+ ["docker", "compose", "-f", "docker-compose.test.yml", "up", "-d"], cwd=tgt_path
29+ )
2130
2231 try:
2332 # Wait for migrations to finish
@@ -26,6 +35,7 @@ def docker_compose():
2635 ["docker", "compose", "-f", "docker-compose.test.yml", "ps", "-q", "migrate-test"],
2736 capture_output=True,
2837 text=True,
38+ cwd=tgt_path,
2939 )
3040
3141 if not result.stdout.strip(): # Container no longer exists
@@ -37,6 +47,7 @@ def docker_compose():
3747 ["docker", "compose", "-f", "docker-compose.test.yml", "logs", "migrate-test"],
3848 capture_output=True,
3949 text=True,
50+ cwd=tgt_path,
4051 )
4152
4253 if "error" in logs.stdout.lower():
@@ -52,7 +63,9 @@ def docker_compose():
5263 ssl_mode="disable",
5364 )
5465 finally:
55- subprocess.run(["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"])
66+ subprocess.run(
67+ ["docker", "compose", "-f", "docker-compose.test.yml", "down", "-v"], cwd=tgt_path
68+ )
5669
5770
5871def _nuke_contents(db):
@@ -114,7 +127,7 @@ def _create_submission_run(
114127):
115128 """Creates a submission run with suitable default values"""
116129 db.create_submission_run(
117- submission,
130+ submission=submission ,
118131 start=start or datetime.datetime.now(tz=datetime.timezone.utc),
119132 end=end
120133 or (datetime.datetime.now(tz=datetime.timezone.utc) + datetime.timedelta(seconds=10)),
@@ -268,9 +281,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
268281 with database as db:
269282 end_time = submit_time + datetime.timedelta(seconds=10)
270283 db.create_submission_run(
271- sub_id,
272- submit_time,
273- end_time,
284+ submission= sub_id,
285+ start= submit_time,
286+ end= end_time,
274287 mode="test",
275288 secret=False,
276289 runner="A100",
@@ -282,9 +295,9 @@ def test_leaderboard_submission_basic(database, submit_leaderboard):
282295 # run ends after the contest deadline; this is valid
283296 end_time_2 = submit_time + datetime.timedelta(days=1, hours=1)
284297 db.create_submission_run(
285- sub_id,
286- submit_time,
287- end_time_2,
298+ submission= sub_id,
299+ start= submit_time,
300+ end= end_time_2,
288301 mode="leaderboard",
289302 secret=True,
290303 runner="H100",
@@ -577,6 +590,107 @@ def test_leaderboard_update(database, task_directory):
577590 }
578591
579592
593+ def test_leaderboard_milestones(database, submit_leaderboard):
594+ with database as db:
595+ lb_id = db.get_leaderboard_id("submit-leaderboard")
596+ milestones = db.get_leaderboard_milestones(lb_id)
597+ assert milestones == []
598+
599+ # at this point, created_at is filled in at the DB level,
600+ # so we cannot set a fixed value for it in the tests below
601+ db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone")
602+ db.create_milestone(
603+ lb_id, "Milestone2", "other code", "Second milestone", exclude_gpus=["T4"]
604+ )
605+ milestones = db.get_leaderboard_milestones(lb_id)
606+ assert milestones == [
607+ {
608+ "code": "sample code",
609+ "created_at": ANY,
610+ "description": "Test milestone",
611+ "exclude_gpus": [""],
612+ "id": 1,
613+ "name": "Milestone",
614+ },
615+ {
616+ "code": "other code",
617+ "created_at": ANY,
618+ "description": "Second milestone",
619+ "exclude_gpus": ["T4"],
620+ "id": 2,
621+ "name": "Milestone2",
622+ },
623+ ]
624+
625+ db.delete_milestones(lb_id)
626+ milestones = db.get_leaderboard_milestones(lb_id)
627+ assert milestones == []
628+
629+
630+ def test_leaderboard_milestone_runs(database, submit_leaderboard):
631+ with database as db:
632+ lb_id = db.get_leaderboard_id("submit-leaderboard")
633+ ms_id = db.create_milestone(lb_id, "Milestone", "sample code", "Test milestone")
634+
635+ start = datetime.datetime.now(tz=datetime.timezone.utc)
636+ end = start + datetime.timedelta(seconds=10)
637+ db.create_submission_run(
638+ milestone=ms_id,
639+ start=start,
640+ end=end,
641+ mode="leaderboard",
642+ secret=False,
643+ runner="A100",
644+ score=5,
645+ compilation=None,
646+ result=sample_run_result(),
647+ system=sample_system_info(),
648+ )
649+
650+ runs = db.get_runs_generic(milestone_id=ms_id)
651+ assert runs == [
652+ {
653+ "compilation": None,
654+ "start_time": start,
655+ "end_time": end,
656+ "meta": {
657+ "command": "./test",
658+ "duration": 1.5,
659+ "exit_code": 0,
660+ "stderr": "",
661+ "stdout": "All tests passed",
662+ "success": True,
663+ },
664+ "mode": "leaderboard",
665+ "passed": True,
666+ "result": {
667+ "test-count": "3",
668+ "test.0.message": "Addition works correctly",
669+ "test.0.spec": "Test addition",
670+ "test.0.status": "pass",
671+ "test.1.spec": "Test multiplication",
672+ "test.1.status": "pass",
673+ "test.2.error": "Division by zero",
674+ "test.2.spec": "Test division",
675+ "test.2.status": "fail",
676+ },
677+ "runner": "A100",
678+ "score": decimal.Decimal("5"),
679+ "secret": False,
680+ "system": {
681+ "cpu": "Intel i9-12900K",
682+ "gpu": "NVIDIA RTX 4090",
683+ "platform": "Linux-5.15.0",
684+ "torch": "2.0.1+cu118",
685+ },
686+ }
687+ ]
688+
689+ db.delete_milestone_runs(lb_id)
690+ runs = db.get_runs_generic(milestone_id=ms_id)
691+ assert runs == []
692+
693+
580694def test_generate_stats(database, submit_leaderboard):
581695 with database as db:
582696 start = datetime.datetime.now(tz=datetime.timezone.utc)
0 commit comments