Skip to content

Commit 631f959

Browse files
committed
add example sbatch script
1 parent f2b4feb commit 631f959

1 file changed

Lines changed: 83 additions & 0 deletions

File tree

scoring/utils/slurm/run_jobs.sh

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/bin/bash
2+
3+
#SBATCH --nodes=1 # give it a full node
4+
#SBATCH --ntasks-per-node=1
5+
#SBATCH --array=<fill with range of items in config, e.g 0-7 >
6+
#SBATCH --partition=v100
7+
#SBATCH --gpus-per-node=8
8+
#SBATCH --exclusive #this will not allow other jobs to run on this cluster
9+
#SBATCH --output=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.out
10+
#SBATCH --error=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.err
11+
12+
# Usage: sbatch <this file>.sh
13+
# This script reads config.json and launches a sbatch job using task
14+
# arrays where each job in the array corresponds to a training run
15+
# for a workload given a random seed and tuning trial index.
16+
# To generate the config.json use make_job_config.py.
17+
18+
set -x
19+
20+
# Pull docker image (ATTENTION: you may want to modify this)
21+
REPO=""
22+
IMAGE=""
23+
y | gcloud auth configure-docker $REPO
24+
docker pull $IMAGE
25+
# Job config (ATTENTION: you may want to modify this)
26+
config_file="" # Replace with your config file path
27+
LOGS_BUCKET="" # replace with your bucket used for logging
28+
29+
30+
# Function to read a JSON file and extract a value by key
31+
read_json_value() {
32+
local json_file="$1"
33+
local index="$2"
34+
local key="$3"
35+
local value=$(jq -r ".[\"$index\"].$key" "$json_file")
36+
echo "$value"
37+
}
38+
39+
# Check if jq is installed
40+
if ! command -v jq &> /dev/null
41+
then
42+
echo "jq could not be found. Please install it."
43+
exit 1
44+
fi
45+
46+
TASK="$SLURM_ARRAY_TASK_ID"
47+
FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
48+
DATASET=$(read_json_value "$config_file" "$TASK" "dataset")
49+
SUBMISSION_PATH=$(read_json_value "$config_file" "$TASK" "submission_path")
50+
FRAMEWORK=$(read_json_value "$config_file" "$TASK" "framework")
51+
TUNING_SEARCH_SPACE=$(read_json_value "$config_file" "$TASK" "tuning_search_space")
52+
EXPERIMENT_DIR=$(read_json_value "$config_file" "$TASK" "experiment_dir")
53+
MAX_STEPS=$(read_json_value "$config_file" "$TASK" "max_steps")
54+
RNG_SEED=$(read_json_value "$config_file" "$TASK" "rng_seed")
55+
WORKLOAD=$(read_json_value "$config_file" "$TASK" "workload")
56+
HPARAM_START_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_start_index")
57+
HPARAM_END_INDEX=$(read_json_value "$config_file" "$TASK" "hparam_end_index")
58+
NUM_TUNING_TRIALS=$(read_json_value "$config_file" "$TASK" "num_tuning_trials")
59+
TUNING_RULESET=$(read_json_value "$config_file" "$TASK" "tuning_ruleset")
60+
MAX_GLOBAL_STEPS=$(read_json_value "$config_file" "$MAX_GLOBAL_STEPS" "max_global_steps")
61+
62+
docker run \
63+
-v /opt/data/:/data/ \
64+
-v $HOME/submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms \
65+
--gpus all \
66+
--ipc=host \
67+
$IMAGE \
68+
-d $DATASET \
69+
-f $FRAMEWORK \
70+
-s $SUBMISSION_PATH \
71+
-w $WORKLOAD \
72+
-t $TUNING_SEARCH_SPACE \
73+
-e $EXPERIMENT_DIR \
74+
-c False \
75+
-o True \
76+
--rng_seed $RNG_SEED \
77+
--hparam_start_index $HPARAM_START_INDEX \
78+
--hparam_end_index $HPARAM_END_INDEX \
79+
--num_tuning_trials $NUM_TUNING_TRIALS \
80+
--tuning_ruleset $TUNING_RULESET \
81+
--logs_bucket $LOGS_BUCKET \
82+
-i true \
83+
-r false

0 commit comments

Comments
 (0)