@@ -1608,6 +1608,23 @@ def _set_vocab_glmedge(self):
16081608 special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["<|endoftext|>"])
16091609 special_vocab.add_to_gguf(self.gguf_writer)
16101610
1611+ def _set_vocab_glm(self):
1612+ from transformers import AutoTokenizer
1613+ tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
1614+ special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
1615+ tokens, toktypes, tokpre = self.get_vocab_base()
1616+ self.gguf_writer.add_tokenizer_model("gpt2")
1617+ self.gguf_writer.add_tokenizer_pre(tokpre)
1618+ self.gguf_writer.add_token_list(tokens)
1619+ self.gguf_writer.add_token_types(toktypes)
1620+ # Special tokens
1621+ # Note: Using <|endoftext|> (151329) for eot causes endless generation
1622+ special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) # 151331
1623+ special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # 151336
1624+ special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329
1625+ special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338
1626+ special_vocab.add_to_gguf(self.gguf_writer)
1627+
16111628 def _set_vocab_interns1(self):
16121629 tokens: list[str] = []
16131630 toktypes: list[int] = []
@@ -7710,6 +7727,9 @@ def prepare_tensors(self):
77107727class DeepseekV2Model(TextModel):
77117728 model_arch = gguf.MODEL_ARCH.DEEPSEEK2
77127729
7730+ # TODO @ngxson : remove this when we support MTP for deepseek models
7731+ skip_mtp = True
7732+
77137733 def set_vocab(self):
77147734 try:
77157735 self._set_vocab_gpt2()
@@ -7841,10 +7861,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
78417861 name = name.replace("e_score_correction_bias", "e_score_correction.bias")
78427862
78437863 # skip Multi-Token Prediction (MTP) layers
7844- block_count = self.hparams["num_hidden_layers"]
7845- match = re.match(r"model.layers.(\d+)", name)
7846- if match and int(match.group(1)) >= block_count:
7847- return
7864+ if self.skip_mtp:
7865+ block_count = self.hparams["num_hidden_layers"]
7866+ match = re.match(r"model.layers.(\d+)", name)
7867+ if match and int(match.group(1)) >= block_count:
7868+ return
78487869
78497870 # process the experts separately
78507871 if name.find("mlp.experts") != -1:
@@ -8684,24 +8705,7 @@ def __init__(self, *args, **kwargs):
86848705 self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
86858706
86868707 def set_vocab(self):
8687- from transformers import AutoTokenizer
8688-
8689- tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
8690- special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
8691- tokens, toktypes, tokpre = self.get_vocab_base()
8692- self.gguf_writer.add_tokenizer_model("gpt2")
8693- self.gguf_writer.add_tokenizer_pre(tokpre)
8694- self.gguf_writer.add_token_list(tokens)
8695- self.gguf_writer.add_token_types(toktypes)
8696-
8697- # Special tokens
8698- # Note: Using <|endoftext|> (151329) for eot causes endless generation
8699- special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) # 151331
8700- special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # 151336
8701- special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329
8702- special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338
8703-
8704- special_vocab.add_to_gguf(self.gguf_writer)
8708+ return self._set_vocab_glm()
87058709
87068710 def set_gguf_parameters(self):
87078711 super().set_gguf_parameters()
@@ -8801,26 +8805,38 @@ def prepare_tensors(self):
88018805class Glm4MoeLiteModel(DeepseekV2Model):
88028806 model_arch = gguf.MODEL_ARCH.DEEPSEEK2
88038807
8804- # copied from Glm4MoeModel
88058808 def set_vocab(self):
8806- from transformers import AutoTokenizer
8809+ return self._set_vocab_glm()
88078810
8808- tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
8809- special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
8810- tokens, toktypes, tokpre = self.get_vocab_base()
8811- self.gguf_writer.add_tokenizer_model("gpt2")
8812- self.gguf_writer.add_tokenizer_pre(tokpre)
8813- self.gguf_writer.add_token_list(tokens)
8814- self.gguf_writer.add_token_types(toktypes)
88158811
8816- # Special tokens
8817- # Note: Using <|endoftext|> (151329) for eot causes endless generation
8818- special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"]) # 151331
8819- special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"]) # 151336
8820- special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"]) # 151329
8821- special_vocab._set_special_token("eom", tokenizer.get_added_vocab()["<|observation|>"]) # 151338
8812+ @ModelBase.register("GlmMoeDsaForCausalLM")
8813+ class GlmMoeDsaModel(DeepseekV2Model):
8814+ model_arch = gguf.MODEL_ARCH.GLM_DSA
8815+ skip_mtp = False
88228816
8823- special_vocab.add_to_gguf(self.gguf_writer)
8817+ def __init__(self, *args, **kwargs):
8818+ super().__init__(*args, **kwargs)
8819+ self.block_count = self.hparams["num_hidden_layers"] + self.hparams.get("num_nextn_predict_layers", 0)
8820+ self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
8821+
8822+ def set_vocab(self):
8823+ return self._set_vocab_glm()
8824+
8825+ def set_gguf_parameters(self):
8826+ super().set_gguf_parameters()
8827+
8828+ rope_dim = self.hparams["qk_rope_head_dim"]
8829+ partial_rotary_factor = self.hparams.get("partial_rotary_factor", 1.0)
8830+ self.gguf_writer.add_rope_dimension_count(int(rope_dim * partial_rotary_factor))
8831+
8832+ # NextN/MTP prediction layers
8833+ if (num_nextn_predict_layers := self.hparams.get("num_nextn_predict_layers")) is not None:
8834+ self.gguf_writer.add_nextn_predict_layers(num_nextn_predict_layers)
8835+
8836+ # DSA indexer parameters
8837+ self.gguf_writer.add_indexer_head_count(self.hparams["index_n_heads"])
8838+ self.gguf_writer.add_indexer_key_length(self.hparams["index_head_dim"])
8839+ self.gguf_writer.add_indexer_top_k(self.hparams["index_topk"])
88248840
88258841
88268842@ModelBase.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
0 commit comments