initial implementation of leaderboard. Lots of stuff can be improved but this brings the core idea

RobotSail · RobotSail · commit 1892a79dc40a · 2025-04-16T03:40:42.000Z
Signed-off-by: Oleg Silkin &lt;97077423+RobotSail@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,6 +42,7 @@ issues = "https://github.com/instructlab/eval/issues"
 "mmlu_branch" = "instructlab.eval.mmlu:MMLUBranchEvaluator"
 "mt_bench" = "instructlab.eval.mt_bench:MTBenchEvaluator"
 "mt_bench_branch" = "instructlab.eval.mt_bench:MTBenchBranchEvaluator"
+"leaderboard_v2" = "instructlab.eval.leaderboard:LeaderboardV2Evaluator"
 
 [tool.setuptools_scm]
 version_file = "src/instructlab/eval/_version.py"
diff --git a/requirements.txt b/requirements.txt
@@ -8,6 +8,8 @@ transformers
 accelerate
 pandas
 pandas-stubs
-lm-eval>=0.4.4
+# All optional dependencies like this can be found in lm-eval:
+# https://github.com/EleutherAI/lm-evaluation-harness/blob/main/pyproject.toml 
+lm-eval[math,ifeval,sentencepiece,vllm]>=0.4.4
 httpx
 ragas>=0.2.11
diff --git a/src/instructlab/eval/leaderboard.py b/src/instructlab/eval/leaderboard.py