Skip to content

Commit d2ecd2d

Browse files
pwilkinCISC
andauthored
common/parser: add --skip-chat-parsing to force a pure content parser. (ggml-org#20289)
* Add `--force-pure-content` to force a pure content parser. * Update common/arg.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Change parameter name [no ci] --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
1 parent 054d8b0 commit d2ecd2d

9 files changed

Lines changed: 33 additions & 0 deletions

File tree

common/arg.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3115,6 +3115,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
31153115
params.chat_template = read_file(value);
31163116
}
31173117
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE"));
3118+
add_opt(common_arg(
3119+
{"--skip-chat-parsing"},
3120+
{"--no-skip-chat-parsing"},
3121+
string_format(
3122+
"force a pure content parser, even if a Jinja template is specified; model will output everything "
3123+
"in the content section, including any reasoning and/or tool calls (default: disabled)"
3124+
),
3125+
[](common_params & params, bool value) {
3126+
params.force_pure_content_parser = value;
3127+
}
3128+
).set_examples({LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SKIP_CHAT_PARSING"));
31183129
add_opt(common_arg(
31193130
{"--prefill-assistant"},
31203131
{"--no-prefill-assistant"},

common/chat.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1562,6 +1562,21 @@ static common_chat_params common_chat_templates_apply_jinja(const struct common_
15621562
}
15631563
}
15641564

1565+
if (inputs.force_pure_content) {
1566+
LOG_WRN("Forcing pure content template, will not render reasoning or tools separately.");
1567+
// Create the result structure
1568+
common_chat_params data;
1569+
auto params_copy = params;
1570+
params_copy.reasoning_format = COMMON_REASONING_FORMAT_NONE;
1571+
data.prompt = common_chat_template_direct_apply(tmpl, params_copy);
1572+
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
1573+
auto parser = build_chat_peg_parser([](common_chat_peg_builder &p) {
1574+
return p.content(p.rest());
1575+
});
1576+
data.parser = parser.save();
1577+
return data;
1578+
}
1579+
15651580
// Ministral/Mistral Large 3 - uses special reasoning structure fixes, can't use autoparser
15661581
// Note: Mistral Small 3.2 uses [CALL_ID] which Ministral doesn't have, so we can distinguish them
15671582
if (src.find("[SYSTEM_PROMPT]") != std::string::npos && src.find("[TOOL_CALLS]") != std::string::npos &&

common/chat.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ struct common_chat_templates_inputs {
204204
std::map<std::string, std::string> chat_template_kwargs;
205205
bool add_bos = false;
206206
bool add_eos = false;
207+
bool force_pure_content = false;
207208
};
208209

209210
struct common_chat_params {

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -544,6 +544,7 @@ struct common_params {
544544
std::string chat_template = ""; // NOLINT
545545
bool use_jinja = true; // NOLINT
546546
bool enable_chat_template = true;
547+
bool force_pure_content_parser = false;
547548
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
548549
int enable_reasoning = -1; // -1 = auto, 0 = disable, 1 = enable
549550
int reasoning_budget = -1;

tools/cli/cli.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ struct cli_context {
215215
inputs.parallel_tool_calls = false;
216216
inputs.add_generation_prompt = true;
217217
inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
218+
inputs.force_pure_content = chat_params.force_pure_content;
218219
inputs.enable_thinking = chat_params.enable_thinking ? common_chat_templates_support_enable_thinking(chat_params.tmpls.get()) : false;
219220

220221
// Apply chat template to the list of messages

tools/completion/completion.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ int main(int argc, char ** argv) {
308308
inputs.use_jinja = g_params->use_jinja;
309309
inputs.messages = chat_msgs;
310310
inputs.add_generation_prompt = !params.prompt.empty();
311+
inputs.force_pure_content = params.force_pure_content_parser;
311312

312313
prompt = common_chat_templates_apply(chat_templates.get(), inputs).prompt;
313314
}

tools/server/server-common.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1065,6 +1065,7 @@ json oaicompat_chat_params_parse(
10651065

10661066
inputs.add_generation_prompt = true;
10671067
}
1068+
inputs.force_pure_content = opt.force_pure_content;
10681069

10691070
// Apply chat template to the list of messages
10701071
auto chat_params = common_chat_templates_apply(opt.tmpls.get(), inputs);

tools/server/server-common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ struct server_chat_params {
290290
int reasoning_budget = -1;
291291
std::string reasoning_budget_message;
292292
std::string media_path;
293+
bool force_pure_content = false;
293294
};
294295

295296
// used by /completions endpoint

tools/server/server-context.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,7 @@ struct server_context_impl {
911911
/* reasoning_budget */ params_base.reasoning_budget,
912912
/* reasoning_budget_msg */ params_base.reasoning_budget_message,
913913
/* media_path */ params_base.media_path,
914+
/* force_pure_content */ params_base.force_pure_content_parser
914915
};
915916
}
916917

0 commit comments

Comments
 (0)