File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -107,13 +107,24 @@ void Tokenizer::pad_tokens(std::vector<int>& tokens,
107107
108108 if (final_length > out_tokens.size ()) {
109109 const size_t pad_count = final_length - out_tokens.size ();
110- out_tokens.insert (out_tokens.end (), pad_count, PAD_TOKEN_ID);
111-
112- if (use_weights) {
113- out_weights.insert (out_weights.end (), pad_count, 1 .0f );
114- }
115- if (use_mask) {
116- out_mask.insert (out_mask.end (), pad_count, 0 .0f );
110+ if (pad_left) {
111+ out_tokens.insert (out_tokens.begin (), pad_count, PAD_TOKEN_ID);
112+
113+ if (use_weights) {
114+ out_weights.insert (out_weights.begin (), pad_count, 1 .0f );
115+ }
116+ if (use_mask) {
117+ out_mask.insert (out_mask.begin (), pad_count, 0 .0f );
118+ }
119+ } else {
120+ out_tokens.insert (out_tokens.end (), pad_count, PAD_TOKEN_ID);
121+
122+ if (use_weights) {
123+ out_weights.insert (out_weights.end (), pad_count, 1 .0f );
124+ }
125+ if (use_mask) {
126+ out_mask.insert (out_mask.end (), pad_count, 0 .0f );
127+ }
117128 }
118129 }
119130 };
Original file line number Diff line number Diff line change @@ -14,6 +14,7 @@ class Tokenizer {
1414 std::vector<std::string> special_tokens;
1515 bool add_bos_token = false ;
1616 bool add_eos_token = false ;
17+ bool pad_left = false ;
1718 std::string end_of_word_suffix;
1819
1920 virtual std::string decode_token (int token_id) const = 0;
You can’t perform that action at this time.
0 commit comments