@@ -16,7 +16,7 @@ def __init__(self) -> None:
1616 ##############
1717 self .set_tasks_affinity = True # required for ls and cp
1818 self .gkfs_daemon_protocol = (
19- "ofi+sockets " # "ofi+verbs" #"ofi+sockets" or "ofi+verbs"
19+ "ofi+verbs " # "ofi+verbs" #"ofi+sockets" or "ofi+verbs"
2020 )
2121 self .cargo_mode = "parallel" # "parallel" or "posix"
2222 self .debug = True
@@ -233,6 +233,9 @@ def to_dict(self): # -> dict[str, Any]:
233233 "app call" : self .app_call ,
234234 "id" : self .job_id ,
235235 "mode" : self .log_suffix ,
236+ "node local" : self .node_local ,
237+ "gkfs_mntdir" : self .gkfs_mntdir if not self .exclude_daemon else "" ,
238+ "gkfs_rootdir" : self .gkfs_rootdir if not self .exclude_daemon else "" ,
236239 }
237240
238241 #!##########################
@@ -318,7 +321,8 @@ def set_variables(self) -> None:
318321 else :
319322 # self.run_dir = self.gkfs_mntdir #? don't enable this flag, as the executing node doesn't have this folder
320323 self .app_flags = (
321- f"workload=unet3d_my_a100_gekko "
324+ # f"workload=unet3d_my_a100_gekko "
325+ f"workload=unet3d_my_a100 " #no need to select a specific one, the folder assignment makes it use geko
322326 # f"workload=bert_v100 "
323327 f"++workload.workflow.generate_data=True ++workload.workflow.train=True ++workload.workflow.checkpoint=True " #++workload.workflow.evaluation=True "
324328 f"++workload.dataset.data_folder={ self .gkfs_mntdir } /data/jit ++workload.checkpoint.checkpoint_folder={ self .gkfs_mntdir } /checkpoints/jit "
0 commit comments