| 1 | #include "llama-chat.h" |
| 2 | |
| 3 | #include "llama.h" |
| 4 | |
| 5 | #include <map> |
| 6 | #include <sstream> |
| 7 | #include <algorithm> |
| 8 | |
| 9 | #if __cplusplus >= 202000L |
| 10 | #define LU8(x) (const char*)(u8##x) |
| 11 | #else |
| 12 | #define LU8(x) u8##x |
| 13 | #endif |
| 14 | |
| 15 | // trim whitespace from the beginning and end of a string |
| 16 | static std::string trim(const std::string & str) { |
| 17 | size_t start = 0; |
| 18 | size_t end = str.size(); |
| 19 | while (start < end && isspace(static_cast<unsigned char>(str[start]))) { |
| 20 | start += 1; |
| 21 | } |
| 22 | while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) { |
| 23 | end -= 1; |
| 24 | } |
| 25 | return str.substr(pos: start, n: end - start); |
| 26 | } |
| 27 | |
| 28 | static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = { |
| 29 | { "chatml" , LLM_CHAT_TEMPLATE_CHATML }, |
| 30 | { "llama2" , LLM_CHAT_TEMPLATE_LLAMA_2 }, |
| 31 | { "llama2-sys" , LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, |
| 32 | { "llama2-sys-bos" , LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, |
| 33 | { "llama2-sys-strip" , LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, |
| 34 | { "mistral-v1" , LLM_CHAT_TEMPLATE_MISTRAL_V1 }, |
| 35 | { "mistral-v3" , LLM_CHAT_TEMPLATE_MISTRAL_V3 }, |
| 36 | { "mistral-v3-tekken" , LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, |
| 37 | { "mistral-v7" , LLM_CHAT_TEMPLATE_MISTRAL_V7 }, |
| 38 | { "mistral-v7-tekken" , LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, |
| 39 | { "phi3" , LLM_CHAT_TEMPLATE_PHI_3 }, |
| 40 | { "phi4" , LLM_CHAT_TEMPLATE_PHI_4 }, |
| 41 | { "falcon3" , LLM_CHAT_TEMPLATE_FALCON_3 }, |
| 42 | { "zephyr" , LLM_CHAT_TEMPLATE_ZEPHYR }, |
| 43 | { "monarch" , LLM_CHAT_TEMPLATE_MONARCH }, |
| 44 | { "gemma" , LLM_CHAT_TEMPLATE_GEMMA }, |
| 45 | { "orion" , LLM_CHAT_TEMPLATE_ORION }, |
| 46 | { "openchat" , LLM_CHAT_TEMPLATE_OPENCHAT }, |
| 47 | { "vicuna" , LLM_CHAT_TEMPLATE_VICUNA }, |
| 48 | { "vicuna-orca" , LLM_CHAT_TEMPLATE_VICUNA_ORCA }, |
| 49 | { "deepseek" , LLM_CHAT_TEMPLATE_DEEPSEEK }, |
| 50 | { "deepseek2" , LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, |
| 51 | { "deepseek3" , LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, |
| 52 | { "command-r" , LLM_CHAT_TEMPLATE_COMMAND_R }, |
| 53 | { "llama3" , LLM_CHAT_TEMPLATE_LLAMA_3 }, |
| 54 | { "chatglm3" , LLM_CHAT_TEMPLATE_CHATGLM_3 }, |
| 55 | { "chatglm4" , LLM_CHAT_TEMPLATE_CHATGLM_4 }, |
| 56 | { "glmedge" , LLM_CHAT_TEMPLATE_GLMEDGE }, |
| 57 | { "minicpm" , LLM_CHAT_TEMPLATE_MINICPM }, |
| 58 | { "exaone3" , LLM_CHAT_TEMPLATE_EXAONE_3 }, |
| 59 | { "exaone4" , LLM_CHAT_TEMPLATE_EXAONE_4 }, |
| 60 | { "rwkv-world" , LLM_CHAT_TEMPLATE_RWKV_WORLD }, |
| 61 | { "granite" , LLM_CHAT_TEMPLATE_GRANITE }, |
| 62 | { "gigachat" , LLM_CHAT_TEMPLATE_GIGACHAT }, |
| 63 | { "megrez" , LLM_CHAT_TEMPLATE_MEGREZ }, |
| 64 | { "yandex" , LLM_CHAT_TEMPLATE_YANDEX }, |
| 65 | { "bailing" , LLM_CHAT_TEMPLATE_BAILING }, |
| 66 | { "bailing-think" , LLM_CHAT_TEMPLATE_BAILING_THINK }, |
| 67 | { "bailing2" , LLM_CHAT_TEMPLATE_BAILING2 }, |
| 68 | { "llama4" , LLM_CHAT_TEMPLATE_LLAMA4 }, |
| 69 | { "smolvlm" , LLM_CHAT_TEMPLATE_SMOLVLM }, |
| 70 | { "hunyuan-moe" , LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, |
| 71 | { "gpt-oss" , LLM_CHAT_TEMPLATE_OPENAI_MOE }, |
| 72 | { "hunyuan-dense" , LLM_CHAT_TEMPLATE_HUNYUAN_DENSE }, |
| 73 | { "kimi-k2" , LLM_CHAT_TEMPLATE_KIMI_K2 }, |
| 74 | { "seed_oss" , LLM_CHAT_TEMPLATE_SEED_OSS }, |
| 75 | { "grok-2" , LLM_CHAT_TEMPLATE_GROK_2 }, |
| 76 | { "pangu-embedded" , LLM_CHAT_TEMPLATE_PANGU_EMBED }, |
| 77 | }; |
| 78 | |
| 79 | llm_chat_template llm_chat_template_from_str(const std::string & name) { |
| 80 | return LLM_CHAT_TEMPLATES.at(k: name); |
| 81 | } |
| 82 | |
| 83 | llm_chat_template llm_chat_detect_template(const std::string & tmpl) { |
| 84 | try { |
| 85 | return llm_chat_template_from_str(name: tmpl); |
| 86 | } catch (const std::out_of_range &) { |
| 87 | // ignore |
| 88 | } |
| 89 | |
| 90 | auto tmpl_contains = [&tmpl](const char * haystack) -> bool { |
| 91 | return tmpl.find(s: haystack) != std::string::npos; |
| 92 | }; |
| 93 | if (tmpl_contains("<|im_start|>" )) { |
| 94 | return tmpl_contains("<|im_sep|>" ) |
| 95 | ? LLM_CHAT_TEMPLATE_PHI_4 |
| 96 | : tmpl_contains("<end_of_utterance>" ) |
| 97 | ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml |
| 98 | : LLM_CHAT_TEMPLATE_CHATML; |
| 99 | } else if (tmpl.find(s: "mistral" ) == 0 || tmpl_contains("[INST]" )) { |
| 100 | if (tmpl_contains("[SYSTEM_PROMPT]" )) { |
| 101 | return LLM_CHAT_TEMPLATE_MISTRAL_V7; |
| 102 | } else if ( |
| 103 | // catches official 'v1' template |
| 104 | tmpl_contains("' [INST] ' + system_message" ) |
| 105 | // catches official 'v3' and 'v3-tekken' templates |
| 106 | || tmpl_contains("[AVAILABLE_TOOLS]" ) |
| 107 | ) { |
| 108 | // Official mistral 'v1', 'v3' and 'v3-tekken' templates |
| 109 | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
| 110 | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
| 111 | if (tmpl_contains(" [INST]" )) { |
| 112 | return LLM_CHAT_TEMPLATE_MISTRAL_V1; |
| 113 | } else if (tmpl_contains("\"[INST]\"" )) { |
| 114 | return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN; |
| 115 | } |
| 116 | return LLM_CHAT_TEMPLATE_MISTRAL_V3; |
| 117 | } else { |
| 118 | // llama2 template and its variants |
| 119 | // [variant] support system message |
| 120 | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
| 121 | bool support_system_message = tmpl_contains("<<SYS>>" ); |
| 122 | bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]" ); |
| 123 | bool strip_message = tmpl_contains("content.strip()" ); |
| 124 | if (strip_message) { |
| 125 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
| 126 | } else if (add_bos_inside_history) { |
| 127 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
| 128 | } else if (support_system_message) { |
| 129 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS; |
| 130 | } else { |
| 131 | return LLM_CHAT_TEMPLATE_LLAMA_2; |
| 132 | } |
| 133 | } |
| 134 | } else if (tmpl_contains("<|assistant|>" ) && tmpl_contains("<|end|>" )) { |
| 135 | return LLM_CHAT_TEMPLATE_PHI_3; |
| 136 | } else if (tmpl_contains("[gMASK]<sop>" )) { |
| 137 | return LLM_CHAT_TEMPLATE_CHATGLM_4; |
| 138 | } else if (tmpl_contains("<|assistant|>" ) && tmpl_contains("<|user|>" )) { |
| 139 | return tmpl_contains("</s>" ) ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; |
| 140 | } else if (tmpl_contains("<|{{ item['role'] }}|>" ) && tmpl_contains("<|begin_of_image|>" )) { |
| 141 | return LLM_CHAT_TEMPLATE_GLMEDGE; |
| 142 | } else if (tmpl_contains("<|user|>" ) && tmpl_contains("<|endoftext|>" )) { |
| 143 | return LLM_CHAT_TEMPLATE_ZEPHYR; |
| 144 | } else if (tmpl_contains("bos_token + message['role']" )) { |
| 145 | return LLM_CHAT_TEMPLATE_MONARCH; |
| 146 | } else if (tmpl_contains("<start_of_turn>" )) { |
| 147 | return LLM_CHAT_TEMPLATE_GEMMA; |
| 148 | } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token" )) { |
| 149 | // OrionStarAI/Orion-14B-Chat |
| 150 | return LLM_CHAT_TEMPLATE_ORION; |
| 151 | } else if (tmpl_contains("GPT4 Correct " )) { |
| 152 | // openchat/openchat-3.5-0106 |
| 153 | return LLM_CHAT_TEMPLATE_OPENCHAT; |
| 154 | } else if (tmpl_contains("USER: " ) && tmpl_contains("ASSISTANT: " )) { |
| 155 | // eachadea/vicuna-13b-1.1 (and Orca variant) |
| 156 | if (tmpl_contains("SYSTEM: " )) { |
| 157 | return LLM_CHAT_TEMPLATE_VICUNA_ORCA; |
| 158 | } |
| 159 | return LLM_CHAT_TEMPLATE_VICUNA; |
| 160 | } else if (tmpl_contains("### Instruction:" ) && tmpl_contains("<|EOT|>" )) { |
| 161 | // deepseek-ai/deepseek-coder-33b-instruct |
| 162 | return LLM_CHAT_TEMPLATE_DEEPSEEK; |
| 163 | } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>" ) && tmpl_contains("<|USER_TOKEN|>" )) { |
| 164 | // CohereForAI/c4ai-command-r-plus |
| 165 | return LLM_CHAT_TEMPLATE_COMMAND_R; |
| 166 | } else if (tmpl_contains("<|start_header_id|>" ) && tmpl_contains("<|end_header_id|>" )) { |
| 167 | return LLM_CHAT_TEMPLATE_LLAMA_3; |
| 168 | } else if (tmpl_contains("[gMASK]sop" )) { |
| 169 | // chatglm3-6b |
| 170 | return LLM_CHAT_TEMPLATE_CHATGLM_3; |
| 171 | } else if (tmpl_contains(LU8("<用户>" ))) { |
| 172 | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
| 173 | return LLM_CHAT_TEMPLATE_MINICPM; |
| 174 | } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token" )) { |
| 175 | return LLM_CHAT_TEMPLATE_DEEPSEEK_2; |
| 176 | } else if (tmpl_contains(LU8("<|Assistant|>" )) && tmpl_contains(LU8("<|User|>" )) && tmpl_contains(LU8("<|end▁of▁sentence|>" ))) { |
| 177 | return LLM_CHAT_TEMPLATE_DEEPSEEK_3; |
| 178 | } else if (tmpl_contains("[|system|]" ) && tmpl_contains("[|assistant|]" ) && tmpl_contains("[|endofturn|]" )) { |
| 179 | if (tmpl_contains("[|tool|]" )) { |
| 180 | return LLM_CHAT_TEMPLATE_EXAONE_4; |
| 181 | } |
| 182 | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
| 183 | // EXAONE-3.0-7.8B-Instruct |
| 184 | return LLM_CHAT_TEMPLATE_EXAONE_3; |
| 185 | } else if (tmpl_contains("rwkv-world" ) || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}" )) { |
| 186 | return LLM_CHAT_TEMPLATE_RWKV_WORLD; |
| 187 | } else if (tmpl_contains("<|start_of_role|>" )) { |
| 188 | return LLM_CHAT_TEMPLATE_GRANITE; |
| 189 | } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]" )) { |
| 190 | return LLM_CHAT_TEMPLATE_GIGACHAT; |
| 191 | } else if (tmpl_contains("<|role_start|>" )) { |
| 192 | return LLM_CHAT_TEMPLATE_MEGREZ; |
| 193 | } else if (tmpl_contains(" Ассистент:" )) { |
| 194 | return LLM_CHAT_TEMPLATE_YANDEX; |
| 195 | } else if (tmpl_contains("<role>ASSISTANT</role>" ) && tmpl_contains("'HUMAN'" )) { |
| 196 | return LLM_CHAT_TEMPLATE_BAILING; |
| 197 | } else if (tmpl_contains("<role>ASSISTANT</role>" ) && tmpl_contains("\"HUMAN\"" ) && tmpl_contains("<think>" )) { |
| 198 | return LLM_CHAT_TEMPLATE_BAILING_THINK; |
| 199 | } else if (tmpl_contains("<role>ASSISTANT</role>" ) && tmpl_contains("<role>HUMAN</role>" ) && tmpl_contains("<|role_end|>" )) { |
| 200 | return LLM_CHAT_TEMPLATE_BAILING2; |
| 201 | } else if (tmpl_contains("<|header_start|>" ) && tmpl_contains("<|header_end|>" )) { |
| 202 | return LLM_CHAT_TEMPLATE_LLAMA4; |
| 203 | } else if (tmpl_contains("<|endofuserprompt|>" )) { |
| 204 | return LLM_CHAT_TEMPLATE_DOTS1; |
| 205 | } else if (tmpl_contains("<|extra_0|>" ) && tmpl_contains("<|extra_4|>" )) { |
| 206 | return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; |
| 207 | } else if (tmpl_contains("<|start|>" ) && tmpl_contains("<|channel|>" )) { |
| 208 | return LLM_CHAT_TEMPLATE_OPENAI_MOE; |
| 209 | } else if (tmpl_contains("<|hy_Assistant|>" ) && tmpl_contains("<|hy_place▁holder▁no▁3|>" )) { |
| 210 | return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE; |
| 211 | } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>" )) { |
| 212 | return LLM_CHAT_TEMPLATE_KIMI_K2; |
| 213 | } else if (tmpl_contains("<seed:bos>" )) { |
| 214 | return LLM_CHAT_TEMPLATE_SEED_OSS; |
| 215 | } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>" )) { |
| 216 | return LLM_CHAT_TEMPLATE_GROK_2; |
| 217 | } else if (tmpl_contains(LU8("[unused9]系统:[unused10]" ))) { |
| 218 | return LLM_CHAT_TEMPLATE_PANGU_EMBED; |
| 219 | } |
| 220 | return LLM_CHAT_TEMPLATE_UNKNOWN; |
| 221 | } |
| 222 | |
| 223 | // Simple version of "llama_apply_chat_template" that only works with strings |
| 224 | // This function uses heuristic checks to determine commonly used template. It is not a jinja parser. |
| 225 | int32_t llm_chat_apply_template( |
| 226 | llm_chat_template tmpl, |
| 227 | const std::vector<const llama_chat_message *> & chat, |
| 228 | std::string & dest, bool add_ass) { |
| 229 | // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527 |
| 230 | std::stringstream ss; |
| 231 | if (tmpl == LLM_CHAT_TEMPLATE_CHATML) { |
| 232 | // chatml template |
| 233 | for (auto message : chat) { |
| 234 | ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n" ; |
| 235 | } |
| 236 | if (add_ass) { |
| 237 | ss << "<|im_start|>assistant\n" ; |
| 238 | } |
| 239 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { |
| 240 | // Official mistral 'v7' template |
| 241 | // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 |
| 242 | // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken |
| 243 | const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "" ; |
| 244 | for (auto message : chat) { |
| 245 | std::string role(message->role); |
| 246 | std::string content(message->content); |
| 247 | if (role == "system" ) { |
| 248 | ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]" ; |
| 249 | } else if (role == "user" ) { |
| 250 | ss << "[INST]" << trailing_space << content << "[/INST]" ; |
| 251 | } else { |
| 252 | ss << trailing_space << content << "</s>" ; |
| 253 | } |
| 254 | } |
| 255 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 |
| 256 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3 |
| 257 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) { |
| 258 | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
| 259 | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
| 260 | std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "" ; |
| 261 | std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " " ; |
| 262 | bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3; |
| 263 | bool is_inside_turn = false; |
| 264 | for (auto message : chat) { |
| 265 | if (!is_inside_turn) { |
| 266 | ss << leading_space << "[INST]" << trailing_space; |
| 267 | is_inside_turn = true; |
| 268 | } |
| 269 | std::string role(message->role); |
| 270 | std::string content(message->content); |
| 271 | if (role == "system" ) { |
| 272 | ss << content << "\n\n" ; |
| 273 | } else if (role == "user" ) { |
| 274 | ss << content << leading_space << "[/INST]" ; |
| 275 | } else { |
| 276 | ss << trailing_space << (trim_assistant_message ? trim(str: content) : content) << "</s>" ; |
| 277 | is_inside_turn = false; |
| 278 | } |
| 279 | } |
| 280 | } else if ( |
| 281 | tmpl == LLM_CHAT_TEMPLATE_LLAMA_2 |
| 282 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS |
| 283 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS |
| 284 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) { |
| 285 | // llama2 template and its variants |
| 286 | // [variant] support system message |
| 287 | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
| 288 | bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2; |
| 289 | // [variant] add BOS inside history |
| 290 | bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
| 291 | // [variant] trim spaces from the input message |
| 292 | bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
| 293 | // construct the prompt |
| 294 | bool is_inside_turn = true; // skip BOS at the beginning |
| 295 | ss << "[INST] " ; |
| 296 | for (auto message : chat) { |
| 297 | std::string content = strip_message ? trim(str: message->content) : message->content; |
| 298 | std::string role(message->role); |
| 299 | if (!is_inside_turn) { |
| 300 | is_inside_turn = true; |
| 301 | ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] " ); |
| 302 | } |
| 303 | if (role == "system" ) { |
| 304 | if (support_system_message) { |
| 305 | ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n" ; |
| 306 | } else { |
| 307 | // if the model does not support system message, we still include it in the first message, but without <<SYS>> |
| 308 | ss << content << "\n" ; |
| 309 | } |
| 310 | } else if (role == "user" ) { |
| 311 | ss << content << " [/INST]" ; |
| 312 | } else { |
| 313 | ss << content << "</s>" ; |
| 314 | is_inside_turn = false; |
| 315 | } |
| 316 | } |
| 317 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) { |
| 318 | // Phi 3 |
| 319 | for (auto message : chat) { |
| 320 | std::string role(message->role); |
| 321 | ss << "<|" << role << "|>\n" << message->content << "<|end|>\n" ; |
| 322 | } |
| 323 | if (add_ass) { |
| 324 | ss << "<|assistant|>\n" ; |
| 325 | } |
| 326 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) { |
| 327 | // chatml template |
| 328 | for (auto message : chat) { |
| 329 | ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>" ; |
| 330 | } |
| 331 | if (add_ass) { |
| 332 | ss << "<|im_start|>assistant<|im_sep|>" ; |
| 333 | } |
| 334 | } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { |
| 335 | // Falcon 3 |
| 336 | for (auto message : chat) { |
| 337 | std::string role(message->role); |
| 338 | ss << "<|" << role << "|>\n" << message->content << "\n" ; |
| 339 | } |
| 340 | if (add_ass) { |
| 341 | ss << "<|assistant|>\n" ; |
| 342 | } |
| 343 | } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { |
| 344 | // zephyr template |
| 345 | for (auto message : chat) { |
| 346 | ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n" ; |
| 347 | } |
| 348 | if (add_ass) { |
| 349 | ss << "<|assistant|>\n" ; |
| 350 | } |
| 351 | } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) { |
| 352 | // mlabonne/AlphaMonarch-7B template (the <s> is included inside history) |
| 353 | for (auto message : chat) { |
| 354 | std::string bos = (message == chat.front()) ? "" : "<s>" ; // skip BOS for first message |
| 355 | ss << bos << message->role << "\n" << message->content << "</s>\n" ; |
| 356 | } |
| 357 | if (add_ass) { |
| 358 | ss << "<s>assistant\n" ; |
| 359 | } |
| 360 | } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) { |
| 361 | // google/gemma-7b-it |
| 362 | std::string system_prompt = "" ; |
| 363 | for (auto message : chat) { |
| 364 | std::string role(message->role); |
| 365 | if (role == "system" ) { |
| 366 | // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken |
| 367 | system_prompt += trim(str: message->content); |
| 368 | continue; |
| 369 | } |
| 370 | // in gemma, "assistant" is "model" |
| 371 | role = role == "assistant" ? "model" : message->role; |
| 372 | ss << "<start_of_turn>" << role << "\n" ; |
| 373 | if (!system_prompt.empty() && role != "model" ) { |
| 374 | ss << system_prompt << "\n\n" ; |
| 375 | system_prompt = "" ; |
| 376 | } |
| 377 | ss << trim(str: message->content) << "<end_of_turn>\n" ; |
| 378 | } |
| 379 | if (add_ass) { |
| 380 | ss << "<start_of_turn>model\n" ; |
| 381 | } |
| 382 | } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) { |
| 383 | // OrionStarAI/Orion-14B-Chat |
| 384 | std::string system_prompt = "" ; |
| 385 | for (auto message : chat) { |
| 386 | std::string role(message->role); |
| 387 | if (role == "system" ) { |
| 388 | // there is no system message support, we will merge it with user prompt |
| 389 | system_prompt += message->content; |
| 390 | continue; |
| 391 | } else if (role == "user" ) { |
| 392 | ss << "Human: " ; |
| 393 | if (!system_prompt.empty()) { |
| 394 | ss << system_prompt << "\n\n" ; |
| 395 | system_prompt = "" ; |
| 396 | } |
| 397 | ss << message->content << "\n\nAssistant: </s>" ; |
| 398 | } else { |
| 399 | ss << message->content << "</s>" ; |
| 400 | } |
| 401 | } |
| 402 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) { |
| 403 | // openchat/openchat-3.5-0106, |
| 404 | for (auto message : chat) { |
| 405 | std::string role(message->role); |
| 406 | if (role == "system" ) { |
| 407 | ss << message->content << "<|end_of_turn|>" ; |
| 408 | } else { |
| 409 | role[0] = toupper(c: role[0]); |
| 410 | ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>" ; |
| 411 | } |
| 412 | } |
| 413 | if (add_ass) { |
| 414 | ss << "GPT4 Correct Assistant:" ; |
| 415 | } |
| 416 | } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
| 417 | // eachadea/vicuna-13b-1.1 (and Orca variant) |
| 418 | for (auto message : chat) { |
| 419 | std::string role(message->role); |
| 420 | if (role == "system" ) { |
| 421 | // Orca-Vicuna variant uses a system prefix |
| 422 | if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
| 423 | ss << "SYSTEM: " << message->content << "\n" ; |
| 424 | } else { |
| 425 | ss << message->content << "\n\n" ; |
| 426 | } |
| 427 | } else if (role == "user" ) { |
| 428 | ss << "USER: " << message->content << "\n" ; |
| 429 | } else if (role == "assistant" ) { |
| 430 | ss << "ASSISTANT: " << message->content << "</s>\n" ; |
| 431 | } |
| 432 | } |
| 433 | if (add_ass) { |
| 434 | ss << "ASSISTANT:" ; |
| 435 | } |
| 436 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { |
| 437 | // deepseek-ai/deepseek-coder-33b-instruct |
| 438 | for (auto message : chat) { |
| 439 | std::string role(message->role); |
| 440 | if (role == "system" ) { |
| 441 | ss << message->content; |
| 442 | } else if (role == "user" ) { |
| 443 | ss << "### Instruction:\n" << message->content << "\n" ; |
| 444 | } else if (role == "assistant" ) { |
| 445 | ss << "### Response:\n" << message->content << "\n<|EOT|>\n" ; |
| 446 | } |
| 447 | } |
| 448 | if (add_ass) { |
| 449 | ss << "### Response:\n" ; |
| 450 | } |
| 451 | } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) { |
| 452 | // CohereForAI/c4ai-command-r-plus |
| 453 | for (auto message : chat) { |
| 454 | std::string role(message->role); |
| 455 | if (role == "system" ) { |
| 456 | ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>" ; |
| 457 | } else if (role == "user" ) { |
| 458 | ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>" ; |
| 459 | } else if (role == "assistant" ) { |
| 460 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(str: message->content) << "<|END_OF_TURN_TOKEN|>" ; |
| 461 | } |
| 462 | } |
| 463 | if (add_ass) { |
| 464 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" ; |
| 465 | } |
| 466 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) { |
| 467 | // Llama 3 |
| 468 | for (auto message : chat) { |
| 469 | std::string role(message->role); |
| 470 | ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(str: message->content) << "<|eot_id|>" ; |
| 471 | } |
| 472 | if (add_ass) { |
| 473 | ss << "<|start_header_id|>assistant<|end_header_id|>\n\n" ; |
| 474 | } |
| 475 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) { |
| 476 | // chatglm3-6b |
| 477 | ss << "[gMASK]" << "sop" ; |
| 478 | for (auto message : chat) { |
| 479 | std::string role(message->role); |
| 480 | ss << "<|" << role << "|>" << "\n " << message->content; |
| 481 | } |
| 482 | if (add_ass) { |
| 483 | ss << "<|assistant|>" ; |
| 484 | } |
| 485 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) { |
| 486 | ss << "[gMASK]" << "<sop>" ; |
| 487 | for (auto message : chat) { |
| 488 | std::string role(message->role); |
| 489 | ss << "<|" << role << "|>" << "\n" << message->content; |
| 490 | } |
| 491 | if (add_ass) { |
| 492 | ss << "<|assistant|>\n" ; |
| 493 | } |
| 494 | } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { |
| 495 | for (auto message : chat) { |
| 496 | std::string role(message->role); |
| 497 | ss << "<|" << role << "|>" << "\n" << message->content; |
| 498 | } |
| 499 | if (add_ass) { |
| 500 | ss << "<|assistant|>" ; |
| 501 | } |
| 502 | } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { |
| 503 | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
| 504 | for (auto message : chat) { |
| 505 | std::string role(message->role); |
| 506 | if (role == "user" ) { |
| 507 | ss << LU8("<用户>" ); |
| 508 | ss << trim(str: message->content); |
| 509 | ss << "<AI>" ; |
| 510 | } else { |
| 511 | ss << trim(str: message->content); |
| 512 | } |
| 513 | } |
| 514 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) { |
| 515 | // DeepSeek-V2 |
| 516 | for (auto message : chat) { |
| 517 | std::string role(message->role); |
| 518 | if (role == "system" ) { |
| 519 | ss << message->content << "\n\n" ; |
| 520 | } else if (role == "user" ) { |
| 521 | ss << "User: " << message->content << "\n\n" ; |
| 522 | } else if (role == "assistant" ) { |
| 523 | ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>" ); |
| 524 | } |
| 525 | } |
| 526 | if (add_ass) { |
| 527 | ss << "Assistant:" ; |
| 528 | } |
| 529 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) { |
| 530 | // DeepSeek-V3 |
| 531 | for (auto message : chat) { |
| 532 | std::string role(message->role); |
| 533 | if (role == "system" ) { |
| 534 | ss << message->content << "\n\n" ; |
| 535 | } else if (role == "user" ) { |
| 536 | ss << LU8("<|User|>" ) << message->content; |
| 537 | } else if (role == "assistant" ) { |
| 538 | ss << LU8("<|Assistant|>" ) << message->content << LU8("<|end▁of▁sentence|>" ); |
| 539 | } |
| 540 | } |
| 541 | if (add_ass) { |
| 542 | ss << LU8("<|Assistant|>" ); |
| 543 | } |
| 544 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { |
| 545 | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
| 546 | // EXAONE-3.0-7.8B-Instruct |
| 547 | for (auto message : chat) { |
| 548 | std::string role(message->role); |
| 549 | if (role == "system" ) { |
| 550 | ss << "[|system|]" << trim(str: message->content) << "[|endofturn|]\n" ; |
| 551 | } else if (role == "user" ) { |
| 552 | ss << "[|user|]" << trim(str: message->content) << "\n" ; |
| 553 | } else if (role == "assistant" ) { |
| 554 | ss << "[|assistant|]" << trim(str: message->content) << "[|endofturn|]\n" ; |
| 555 | } |
| 556 | } |
| 557 | if (add_ass) { |
| 558 | ss << "[|assistant|]" ; |
| 559 | } |
| 560 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) { |
| 561 | for (auto message : chat) { |
| 562 | std::string role(message->role); |
| 563 | if (role == "system" ) { |
| 564 | ss << "[|system|]" << trim(str: message->content) << "[|endofturn|]\n" ; |
| 565 | } else if (role == "user" ) { |
| 566 | ss << "[|user|]" << trim(str: message->content) << "\n" ; |
| 567 | } else if (role == "assistant" ) { |
| 568 | ss << "[|assistant|]" << trim(str: message->content) << "[|endofturn|]\n" ; |
| 569 | } else if (role == "tool" ) { |
| 570 | ss << "[|tool|]" << trim(str: message->content) << "[|endofturn|]\n" ; |
| 571 | } |
| 572 | } |
| 573 | if (add_ass) { |
| 574 | ss << "[|assistant|]" ; |
| 575 | } |
| 576 | } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { |
| 577 | // this template requires the model to have "\n\n" as EOT token |
| 578 | for (size_t i = 0; i < chat.size(); i++) { |
| 579 | std::string role(chat[i]->role); |
| 580 | if (role == "system" ) { |
| 581 | ss << "System: " << trim(str: chat[i]->content) << "\n\n" ; |
| 582 | } else if (role == "user" ) { |
| 583 | ss << "User: " << trim(str: chat[i]->content) << "\n\n" ; |
| 584 | if (i == chat.size() - 1) { |
| 585 | ss << "Assistant:" ; |
| 586 | } |
| 587 | } else if (role == "assistant" ) { |
| 588 | ss << "Assistant: " << trim(str: chat[i]->content) << "\n\n" ; |
| 589 | } |
| 590 | } |
| 591 | } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { |
| 592 | // IBM Granite template |
| 593 | for (const auto & message : chat) { |
| 594 | std::string role(message->role); |
| 595 | ss << "<|start_of_role|>" << role << "<|end_of_role|>" ; |
| 596 | if (role == "assistant_tool_call" ) { |
| 597 | ss << "<|tool_call|>" ; |
| 598 | } |
| 599 | ss << message->content << "<|end_of_text|>\n" ; |
| 600 | } |
| 601 | if (add_ass) { |
| 602 | ss << "<|start_of_role|>assistant<|end_of_role|>" ; |
| 603 | } |
| 604 | } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { |
| 605 | // GigaChat template |
| 606 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system" ; |
| 607 | |
| 608 | // Handle system message if present |
| 609 | if (has_system) { |
| 610 | ss << "<s>" << chat[0]->content << "<|message_sep|>" ; |
| 611 | } else { |
| 612 | ss << "<s>" ; |
| 613 | } |
| 614 | |
| 615 | // Process remaining messages |
| 616 | for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { |
| 617 | std::string role(chat[i]->role); |
| 618 | if (role == "user" ) { |
| 619 | ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" |
| 620 | << "available functions<|role_sep|>[]<|message_sep|>" ; |
| 621 | } else if (role == "assistant" ) { |
| 622 | ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>" ; |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | // Add generation prompt if needed |
| 627 | if (add_ass) { |
| 628 | ss << "assistant<|role_sep|>" ; |
| 629 | } |
| 630 | } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) { |
| 631 | // Megrez template |
| 632 | for (auto message : chat) { |
| 633 | std::string role(message->role); |
| 634 | ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>" ; |
| 635 | } |
| 636 | |
| 637 | if (add_ass) { |
| 638 | ss << "<|role_start|>assistant<|role_end|>" ; |
| 639 | } |
| 640 | } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { |
| 641 | // Yandex template ("\n\n" is defined as EOT token) |
| 642 | |
| 643 | for (size_t i = 0; i < chat.size(); i++) { |
| 644 | std::string role(chat[i]->role); |
| 645 | if (role == "user" ) { |
| 646 | ss << " Пользователь: " << chat[i]->content << "\n\n" ; |
| 647 | } else if (role == "assistant" ) { |
| 648 | ss << " Ассистент: " << chat[i]->content << "\n\n" ; |
| 649 | } |
| 650 | } |
| 651 | |
| 652 | // Add generation prompt if needed |
| 653 | if (add_ass) { |
| 654 | ss << " Ассистент:[SEP]" ; |
| 655 | } |
| 656 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
| 657 | // Bailing (Ling/Ring) template |
| 658 | for (auto message : chat) { |
| 659 | std::string role(message->role); |
| 660 | |
| 661 | if (role == "user" ) { |
| 662 | role = "HUMAN" ; |
| 663 | } else { |
| 664 | std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper); |
| 665 | } |
| 666 | |
| 667 | ss << "<role>" << role << "</role>" << message->content; |
| 668 | } |
| 669 | |
| 670 | if (add_ass) { |
| 671 | ss << "<role>ASSISTANT</role>" ; |
| 672 | |
| 673 | if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
| 674 | ss << "<think>" ; |
| 675 | } |
| 676 | } |
| 677 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { |
| 678 | // Bailing2 (Ling 2.0) template |
| 679 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system" ; |
| 680 | |
| 681 | if (!has_system) { |
| 682 | ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>" ; |
| 683 | } |
| 684 | |
| 685 | for (auto message : chat) { |
| 686 | std::string role(message->role); |
| 687 | |
| 688 | if (role == "user" ) { |
| 689 | role = "HUMAN" ; |
| 690 | } else { |
| 691 | std::transform(first: role.begin(), last: role.end(), result: role.begin(), unary_op: ::toupper); |
| 692 | } |
| 693 | |
| 694 | ss << "<role>" << role << "</role>" << message->content << "<|role_end|>" ; |
| 695 | } |
| 696 | |
| 697 | if (add_ass) { |
| 698 | ss << "<role>ASSISTANT</role>" ; |
| 699 | } |
| 700 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) { |
| 701 | // Llama 4 |
| 702 | for (auto message : chat) { |
| 703 | std::string role(message->role); |
| 704 | ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(str: message->content) << "<|eot|>" ; |
| 705 | } |
| 706 | if (add_ass) { |
| 707 | ss << "<|header_start|>assistant<|header_end|>\n\n" ; |
| 708 | } |
| 709 | } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) { |
| 710 | // SmolVLM |
| 711 | ss << "<|im_start|>" ; // uses <|im_start|> as BOS, but the actual content is NOT chatml |
| 712 | for (auto message : chat) { |
| 713 | std::string role(message->role); |
| 714 | if (role == "system" ) { |
| 715 | ss << message->content << "\n\n" ; |
| 716 | } else if (role == "user" ) { |
| 717 | ss << "User: " << message->content << "<end_of_utterance>\n" ; |
| 718 | } else { |
| 719 | ss << "Assistant: " << message->content << "<end_of_utterance>\n" ; |
| 720 | } |
| 721 | } |
| 722 | if (add_ass) { |
| 723 | ss << "Assistant:" ; |
| 724 | } |
| 725 | } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { |
| 726 | // dots.llm1.inst (DOTS1) |
| 727 | for (auto message : chat) { |
| 728 | std::string role(message->role); |
| 729 | if (role == "system" ) { |
| 730 | ss << "<|system|>" << message->content << "<|endofsystem|>" ; |
| 731 | } else if (role == "user" ) { |
| 732 | ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>" ; |
| 733 | } else { |
| 734 | ss << "<|response|>" << message->content << "<|endofresponse|>" ; |
| 735 | } |
| 736 | } |
| 737 | if (add_ass) { |
| 738 | ss << "<|response|>" ; |
| 739 | } |
| 740 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { |
| 741 | // tencent/Hunyuan-A13B-Instruct |
| 742 | for (auto message : chat) { |
| 743 | std::string role(message->role); |
| 744 | if (role == "system" ) { |
| 745 | ss << "<|startoftext|>" << message->content << "<|extra_4|>" ; |
| 746 | } else if (role == "assistant" ) { |
| 747 | ss << message->content << "<|eos|>" ; |
| 748 | } else { |
| 749 | ss << "<|startoftext|>" << message->content << "<|extra_0|>" ; |
| 750 | } |
| 751 | } |
| 752 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) { |
| 753 | // OpenAI MoE (based on Harmony chat template) |
| 754 | for (auto message : chat) { |
| 755 | std::string role(message->role); |
| 756 | ss << "<|start|>" << role << "<|message|>" << message->content; |
| 757 | ss << (role == "assistant" ? "<|return|>" : "<|end|>" ); |
| 758 | } |
| 759 | if (add_ass) { |
| 760 | ss << "<|start|>assistant" ; |
| 761 | } |
| 762 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) { |
| 763 | // tencent/Hunyuan-4B-Instruct |
| 764 | for (size_t i = 0; i < chat.size(); i++) { |
| 765 | std::string role(chat[i]->role); |
| 766 | if (i == 0) { |
| 767 | if (role == "system" ) { |
| 768 | ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>" ; |
| 769 | } |
| 770 | } |
| 771 | |
| 772 | if (role == "assistant" ) { |
| 773 | ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>" ; |
| 774 | } else if (role == "user" ) { |
| 775 | ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>" ; |
| 776 | } |
| 777 | } |
| 778 | } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { |
| 779 | // moonshotai/Kimi-K2-Instruct |
| 780 | for (auto message : chat) { |
| 781 | std::string role(message->role); |
| 782 | if (role == "system" ) { |
| 783 | ss << "<|im_system|>system<|im_middle|>" ; |
| 784 | } else if (role == "user" ) { |
| 785 | ss << "<|im_user|>user<|im_middle|>" ; |
| 786 | } else if (role == "assistant" ) { |
| 787 | ss << "<|im_assistant|>assistant<|im_middle|>" ; |
| 788 | } else if (role == "tool" ) { |
| 789 | ss << "<|im_system|>tool<|im_middle|>" ; |
| 790 | } |
| 791 | |
| 792 | ss << message->content << "<|im_end|>" ; |
| 793 | } |
| 794 | if (add_ass) { |
| 795 | ss << "<|im_assistant|>assistant<|im_middle|>" ; |
| 796 | } |
| 797 | } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) { |
| 798 | for (auto message: chat) { |
| 799 | std::string role(message->role); |
| 800 | ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(str: message->content) : message->content) << "<seed:eos>" ; |
| 801 | } |
| 802 | if (add_ass) { |
| 803 | ss << "<seed:bos>assistant\n" ; |
| 804 | } |
| 805 | } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) { |
| 806 | for (auto message : chat) { |
| 807 | std::string role(message->role); |
| 808 | if (role == "system" ) { |
| 809 | ss << "System: " << trim(str: message->content) << "<|separator|>\n\n" ; |
| 810 | } else if (role == "user" ) { |
| 811 | ss << "Human: " << trim(str: message->content) << "<|separator|>\n\n" ; |
| 812 | } else if (role == "assistant" ) { |
| 813 | ss << "Assistant: " << message->content << "<|separator|>\n\n" ; |
| 814 | } |
| 815 | } |
| 816 | if (add_ass) { |
| 817 | ss << "Assistant:" ; |
| 818 | } |
| 819 | }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) { |
| 820 | // [unused9]系统:xxx[unused10] |
| 821 | // [unused9]用户:xxx[unused10] |
| 822 | // [unused9]助手:xxx[unused10] |
| 823 | // ... |
| 824 | for (size_t i = 0; i < chat.size(); ++i) { |
| 825 | const auto & msg = chat[i]; |
| 826 | const std::string & role = msg->role; |
| 827 | const std::string & content = msg->content; |
| 828 | |
| 829 | if (i == 0 && role != "system" ) { |
| 830 | ss << "[unused9]系统:[unused10]" ; |
| 831 | } |
| 832 | |
| 833 | if (role == "system" ) { |
| 834 | ss << "[unused9]系统:" << content << "[unused10]" ; |
| 835 | } else if (role == "user" ) { |
| 836 | ss << "[unused9]用户:" << content << "[unused10]" ; |
| 837 | } else if (role == "assistant" ) { |
| 838 | ss << "[unused9]助手:" << content << "[unused10]" ; |
| 839 | } else if (role == "tool" ) { |
| 840 | ss << "[unused9]工具:" << content << "[unused10]" ; |
| 841 | } else if (role == "function" ) { |
| 842 | ss << "[unused9]方法:" << content << "[unused10]" ; |
| 843 | } |
| 844 | } |
| 845 | if (add_ass) { |
| 846 | ss << "[unused9]助手:" ; |
| 847 | } |
| 848 | } else { |
| 849 | // template not supported |
| 850 | return -1; |
| 851 | } |
| 852 | dest = ss.str(); |
| 853 | return dest.size(); |
| 854 | } |
| 855 | |
| 856 | // public interface |
| 857 | |
| 858 | int32_t llama_chat_builtin_templates(const char ** output, size_t len) { |
| 859 | auto it = LLM_CHAT_TEMPLATES.begin(); |
| 860 | for (size_t i = 0; i < std::min(a: len, b: LLM_CHAT_TEMPLATES.size()); i++) { |
| 861 | output[i] = it->first.c_str(); |
| 862 | std::advance(i&: it, n: 1); |
| 863 | } |
| 864 | return (int32_t) LLM_CHAT_TEMPLATES.size(); |
| 865 | } |
| 866 | |