@@ -335,6 +335,20 @@ def train():
335335 config .rope_scaling = {"type" : "linear" , "factor" : scaling_factor }
336336 config .use_cache = False
337337
338+ tokenizer = transformers .AutoTokenizer .from_pretrained (
339+ model_args .model_name_or_path ,
340+ cache_dir = training_args .cache_dir ,
341+ model_max_length = training_args .model_max_length ,
342+ padding_side = "right" ,
343+ use_fast = True ,
344+ )
345+ tokenizer .pad_token = tokenizer .unk_token
346+ tokenizer .pad_token = tokenizer .eos_token
347+
348+ # Making sure the tokenizer works before loading the model.
349+ print (tokenizer (["This is a test" , "secondary" ], padding = True ))
350+ print (tokenizer .apply_chat_template ([{"role" : "user" , "content" : "This is a test" }]))
351+
338352 # Load model and tokenizer
339353 model = transformers .AutoModelForCausalLM .from_pretrained (
340354 model_args .model_name_or_path ,
@@ -358,14 +372,6 @@ def train():
358372 # Format output dir
359373 training_args .output_dir = f"{ training_args .output_dir } _medusa_mlp_{ model_args .model_name_or_path .split ('/' )[- 1 ]} _medusa_{ training_args .medusa_num_heads } _lr_{ training_args .learning_rate } _layers_{ training_args .medusa_num_layers } "
360374
361- tokenizer = transformers .AutoTokenizer .from_pretrained (
362- model_args .model_name_or_path ,
363- cache_dir = training_args .cache_dir ,
364- model_max_length = training_args .model_max_length ,
365- padding_side = "right" ,
366- use_fast = True ,
367- )
368- tokenizer .pad_token = tokenizer .unk_token
369375
370376 # Load data
371377 data_module = make_supervised_data_module (tokenizer = tokenizer , data_args = data_args )
@@ -375,6 +381,7 @@ def train():
375381 medusa_num_heads = training_args .medusa_num_heads ,
376382 medusa_num_layers = training_args .medusa_num_layers ,
377383 base_model_name_or_path = model_args .model_name_or_path ,
384+ version = "2"
378385 )
379386
380387 # Save Medusa config
0 commit comments