1+ #! /bin/bash
2+
3+ # SBATCH --nodes=1 # give it a full node
4+ # SBATCH --ntasks-per-node=1
5+ # SBATCH --array=<fill with range of items in config, e.g 0-7 >
6+ # SBATCH --partition=v100
7+ # SBATCH --gpus-per-node=8
8+ # SBATCH --exclusive #this will not allow other jobs to run on this cluster
9+ # SBATCH --output=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.out
10+ # SBATCH --error=experiments/tests/jit_debug_deepspeech_old_stephint_nadamw/job_%A_%a.err
11+
12+ # Usage: sbatch <this file>.sh
13+ # This script reads config.json and launches a sbatch job using task
14+ # arrays where each job in the array corresponds to a training run
15+ # for a workload given a random seed and tuning trial index.
16+ # To generate the config.json use make_job_config.py.
17+
18+ set -x
19+
20+ # Pull docker image (ATTENTION: you may want to modify this)
21+ REPO=" "
22+ IMAGE=" "
23+ y | gcloud auth configure-docker $REPO
24+ docker pull $IMAGE
25+ # Job config (ATTENTION: you may want to modify this)
26+ config_file=" " # Replace with your config file path
27+ LOGS_BUCKET=" " # replace with your bucket used for logging
28+
29+
30+ # Function to read a JSON file and extract a value by key
31+ read_json_value () {
32+ local json_file=" $1 "
33+ local index=" $2 "
34+ local key=" $3 "
35+ local value=$( jq -r " .[\" $index \" ].$key " " $json_file " )
36+ echo " $value "
37+ }
38+
39+ # Check if jq is installed
40+ if ! command -v jq & > /dev/null
41+ then
42+ echo " jq could not be found. Please install it."
43+ exit 1
44+ fi
45+
46+ TASK=" $SLURM_ARRAY_TASK_ID "
47+ FRAMEWORK=$( read_json_value " $config_file " " $TASK " " framework" )
48+ DATASET=$( read_json_value " $config_file " " $TASK " " dataset" )
49+ SUBMISSION_PATH=$( read_json_value " $config_file " " $TASK " " submission_path" )
50+ FRAMEWORK=$( read_json_value " $config_file " " $TASK " " framework" )
51+ TUNING_SEARCH_SPACE=$( read_json_value " $config_file " " $TASK " " tuning_search_space" )
52+ EXPERIMENT_DIR=$( read_json_value " $config_file " " $TASK " " experiment_dir" )
53+ MAX_STEPS=$( read_json_value " $config_file " " $TASK " " max_steps" )
54+ RNG_SEED=$( read_json_value " $config_file " " $TASK " " rng_seed" )
55+ WORKLOAD=$( read_json_value " $config_file " " $TASK " " workload" )
56+ HPARAM_START_INDEX=$( read_json_value " $config_file " " $TASK " " hparam_start_index" )
57+ HPARAM_END_INDEX=$( read_json_value " $config_file " " $TASK " " hparam_end_index" )
58+ NUM_TUNING_TRIALS=$( read_json_value " $config_file " " $TASK " " num_tuning_trials" )
59+ TUNING_RULESET=$( read_json_value " $config_file " " $TASK " " tuning_ruleset" )
60+ MAX_GLOBAL_STEPS=$( read_json_value " $config_file " " $MAX_GLOBAL_STEPS " " max_global_steps" )
61+
62+ docker run \
63+ -v /opt/data/:/data/ \
64+ -v $HOME /submissions_algorithms/:/algorithmic-efficiency/submissions_algorithms \
65+ --gpus all \
66+ --ipc=host \
67+ $IMAGE \
68+ -d $DATASET \
69+ -f $FRAMEWORK \
70+ -s $SUBMISSION_PATH \
71+ -w $WORKLOAD \
72+ -t $TUNING_SEARCH_SPACE \
73+ -e $EXPERIMENT_DIR \
74+ -c False \
75+ -o True \
76+ --rng_seed $RNG_SEED \
77+ --hparam_start_index $HPARAM_START_INDEX \
78+ --hparam_end_index $HPARAM_END_INDEX \
79+ --num_tuning_trials $NUM_TUNING_TRIALS \
80+ --tuning_ruleset $TUNING_RULESET \
81+ --logs_bucket $LOGS_BUCKET \
82+ -i true \
83+ -r false
0 commit comments