From aef4c18bc19068ea153d7953d33db8e78b1d7436 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 17:57:18 +0000 Subject: [PATCH 01/13] Make model type backwards compatible --- examples/python/model-chat.py | 30 ++++++++++++++++++++---------- examples/python/model-qa.py | 28 +++++++++++++++++++--------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 12fb18a44..20246f3a1 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -29,24 +29,34 @@ def main(args): search_options['batch_size'] = 1 if args.verbose: print(search_options) + + # Get model type + model_type = None + if hasattr(model, "type"): + model_type = model.type + else: + import json + + genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) + model_type = genai_config["model"]["type"] if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") else: - if model.type.startswith("phi2") or model.type.startswith("phi3"): + if model_type.startswith("phi2") or model_type.startswith("phi3"): args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model.type.startswith("phi4"): + elif model_type.startswith("phi4"): args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model.type.startswith("llama3"): + elif model_type.startswith("llama3"): args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model.type.startswith("llama2"): + elif model_type.startswith("llama2"): args.chat_template = '{input}' else: - raise ValueError(f"Chat Template for model type {model.type} is not known. Please provide chat template using --chat_template") + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") if args.verbose: - print("Model type is:", model.type) + print("Model type is:", model_type) print("Chat Template is:", args.chat_template) params = og.GeneratorParams(model) @@ -55,13 +65,13 @@ def main(args): if args.verbose: print("Generator created") # Set system prompt - if model.type.startswith('phi2') or model.type.startswith('phi3'): + if model_type.startswith('phi2') or model_type.startswith('phi3'): system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" - elif model.type.startswith('phi4'): + elif model_type.startswith('phi4'): system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" - elif model.type.startswith("llama3"): + elif model_type.startswith("llama3"): system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" - elif model.type.startswith("llama2"): + elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" else: system_prompt = args.system_prompt diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 5e639ef2b..6613a1af6 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -26,34 +26,44 @@ def main(args): search_options['batch_size'] = 1 if args.verbose: print(search_options) + + # Get model type + model_type = None + if hasattr(model, "type"): + model_type = model.type + else: + import json + + genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) + model_type = genai_config["model"]["type"] if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") else: - if model.type.startswith("phi2") or model.type.startswith("phi3"): + if model_type.startswith("phi2") or model_type.startswith("phi3"): args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model.type.startswith("phi4"): + elif model_type.startswith("phi4"): args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model.type.startswith("llama3"): + elif model_type.startswith("llama3"): args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model.type.startswith("llama2"): + elif model_type.startswith("llama2"): args.chat_template = '{input}' else: - raise ValueError(f"Chat Template for model type {model.type} is not known. Please provide chat template using --chat_template") + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") params = og.GeneratorParams(model) params.set_search_options(**search_options) generator = og.Generator(model, params) # Set system prompt - if model.type.startswith('phi2') or model.type.startswith('phi3'): + if model_type.startswith('phi2') or model_type.startswith('phi3'): system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" - elif model.type.startswith('phi4'): + elif model_type.startswith('phi4'): system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" - elif model.type.startswith("llama3"): + elif model_type.startswith("llama3"): system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" - elif model.type.startswith("llama2"): + elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" else: system_prompt = args.system_prompt From 5e17c6674e114954099842aa05e2ef8c780a3fe3 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 18:05:36 +0000 Subject: [PATCH 02/13] Add missing import --- examples/python/model-chat.py | 2 +- examples/python/model-qa.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 20246f3a1..3767c50ad 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -35,7 +35,7 @@ def main(args): if hasattr(model, "type"): model_type = model.type else: - import json + import json, os genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) model_type = genai_config["model"]["type"] diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 6613a1af6..8621e1dad 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -32,7 +32,7 @@ def main(args): if hasattr(model, "type"): model_type = model.type else: - import json + import json, os genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) model_type = genai_config["model"]["type"] From bb7591e3b9f748dff2a133aed9040c01ebd14d1e Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 18:10:57 +0000 Subject: [PATCH 03/13] Open file before loading JSON --- examples/python/model-chat.py | 5 +++-- examples/python/model-qa.py | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 3767c50ad..042e45298 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -37,8 +37,9 @@ def main(args): else: import json, os - genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) - model_type = genai_config["model"]["type"] + with open(os.path.join(args.model_path, "genai_config.json"), "r") as f: + genai_config = json.load(f) + model_type = genai_config["model"]["type"] if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 8621e1dad..99da24f21 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -34,8 +34,9 @@ def main(args): else: import json, os - genai_config = json.load(os.path.join(args.model_path, "genai_config.json")) - model_type = genai_config["model"]["type"] + with open(os.path.join(args.model_path, "genai_config.json"), "r") as f: + genai_config = json.load(f) + model_type = genai_config["model"]["type"] if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: From e8c48b0e29e4167f3157c450e1c58f6f8e970162 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 19:01:45 +0000 Subject: [PATCH 04/13] Add Qwen chat and system templates --- examples/python/model-chat.py | 6 ++++++ examples/python/model-qa.py | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 042e45298..e1f915ddb 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -41,6 +41,7 @@ def main(args): genai_config = json.load(f) model_type = genai_config["model"]["type"] + # Set chat template if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") @@ -53,6 +54,8 @@ def main(args): args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' elif model_type.startswith("llama2"): args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' else: raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") @@ -74,6 +77,9 @@ def main(args): system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" + elif model_type.startswith("qwen2"): + qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." + system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" else: system_prompt = args.system_prompt diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 99da24f21..15c791026 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -38,6 +38,7 @@ def main(args): genai_config = json.load(f) model_type = genai_config["model"]["type"] + # Set chat template if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") @@ -50,6 +51,8 @@ def main(args): args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' elif model_type.startswith("llama2"): args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' else: raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") @@ -66,6 +69,9 @@ def main(args): system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" + elif model_type.startswith("qwen2"): + qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." + system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" else: system_prompt = args.system_prompt From 6f2acc6ffb89cb32af0fe2e25bfa1c47da0823f1 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 19:12:23 +0000 Subject: [PATCH 05/13] Allow user-provided templates to override defaults --- examples/python/model-chat.py | 31 +++++++++++++++++-------------- examples/python/model-qa.py | 31 +++++++++++++++++-------------- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index e1f915ddb..48d6fb6be 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -42,10 +42,11 @@ def main(args): model_type = genai_config["model"]["type"] # Set chat template + default_chat_template = "" if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - else: + elif args.chat_template == default_chat_template: if model_type.startswith("phi2") or model_type.startswith("phi3"): args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' elif model_type.startswith("phi4"): @@ -69,19 +70,21 @@ def main(args): if args.verbose: print("Generator created") # Set system prompt - if model_type.startswith('phi2') or model_type.startswith('phi3'): - system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" - elif model_type.startswith('phi4'): - system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" - elif model_type.startswith("llama3"): - system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" - elif model_type.startswith("llama2"): - system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" - elif model_type.startswith("qwen2"): - qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." - system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" - else: - system_prompt = args.system_prompt + default_system_prompt = "You are a helpful assistant." + if args.system_prompt == default_system_prompt: + if model_type.startswith('phi2') or model_type.startswith('phi3'): + system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" + elif model_type.startswith('phi4'): + system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" + elif model_type.startswith("llama3"): + system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" + elif model_type.startswith("llama2"): + system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" + elif model_type.startswith("qwen2"): + qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." + system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" + else: + system_prompt = args.system_prompt system_tokens = tokenizer.encode(system_prompt) generator.append_tokens(system_tokens) diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 15c791026..c0d7dfd66 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -39,10 +39,11 @@ def main(args): model_type = genai_config["model"]["type"] # Set chat template + default_chat_template = "" if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - else: + elif args.chat_template == default_chat_template: if model_type.startswith("phi2") or model_type.startswith("phi3"): args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' elif model_type.startswith("phi4"): @@ -61,19 +62,21 @@ def main(args): generator = og.Generator(model, params) # Set system prompt - if model_type.startswith('phi2') or model_type.startswith('phi3'): - system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" - elif model_type.startswith('phi4'): - system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" - elif model_type.startswith("llama3"): - system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" - elif model_type.startswith("llama2"): - system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" - elif model_type.startswith("qwen2"): - qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." - system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" - else: - system_prompt = args.system_prompt + default_system_prompt = "You are a helpful assistant." + if args.system_prompt == default_system_prompt: + if model_type.startswith('phi2') or model_type.startswith('phi3'): + system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" + elif model_type.startswith('phi4'): + system_prompt = f"<|im_start|>system<|im_sep|>\n{args.system_prompt}<|im_end|>" + elif model_type.startswith("llama3"): + system_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{args.system_prompt}<|eot_id|>" + elif model_type.startswith("llama2"): + system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" + elif model_type.startswith("qwen2"): + qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." + system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" + else: + system_prompt = args.system_prompt system_tokens = tokenizer.encode(system_prompt) generator.append_tokens(system_tokens) From 2bfcf687b14ba5f9e410f2f4a8ec83d7e83b974a Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 19:44:57 +0000 Subject: [PATCH 06/13] Make templates more flexible to user input --- examples/python/model-chat.py | 38 +++++++++++++++++++---------------- examples/python/model-qa.py | 38 +++++++++++++++++++---------------- 2 files changed, 42 insertions(+), 34 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 48d6fb6be..2ef6d1188 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -42,23 +42,26 @@ def main(args): model_type = genai_config["model"]["type"] # Set chat template - default_chat_template = "" if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - elif args.chat_template == default_chat_template: - if model_type.startswith("phi2") or model_type.startswith("phi3"): - args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model_type.startswith("phi4"): - args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model_type.startswith("llama3"): - args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model_type.startswith("llama2"): - args.chat_template = '{input}' - elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + + if "<|" in args.chat_template and "|>" in args.chat_template: + # User-provided chat template already has tags + pass else: - raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") + if model_type.startswith("phi2") or model_type.startswith("phi3"): + args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + elif model_type.startswith("phi4"): + args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' + elif model_type.startswith("llama3"): + args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + elif model_type.startswith("llama2"): + args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + else: + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") if args.verbose: print("Model type is:", model_type) @@ -70,8 +73,10 @@ def main(args): if args.verbose: print("Generator created") # Set system prompt - default_system_prompt = "You are a helpful assistant." - if args.system_prompt == default_system_prompt: + if "<|" in args.system_prompt and "|>" in args.chat_template: + # User-provided system template already has tags + system_prompt = args.system_prompt + else: if model_type.startswith('phi2') or model_type.startswith('phi3'): system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" elif model_type.startswith('phi4'): @@ -81,8 +86,7 @@ def main(args): elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" elif model_type.startswith("qwen2"): - qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." - system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" + system_prompt = f"<|im_start|>system\n{args.system_prompt}<|im_end|>\n" else: system_prompt = args.system_prompt diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index c0d7dfd66..a449abf86 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -39,31 +39,36 @@ def main(args): model_type = genai_config["model"]["type"] # Set chat template - default_chat_template = "" if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - elif args.chat_template == default_chat_template: - if model_type.startswith("phi2") or model_type.startswith("phi3"): - args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model_type.startswith("phi4"): - args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model_type.startswith("llama3"): - args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model_type.startswith("llama2"): - args.chat_template = '{input}' - elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + + if "<|" in args.chat_template and "|>" in args.chat_template: + # User-provided chat template already has tags + pass else: - raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") + if model_type.startswith("phi2") or model_type.startswith("phi3"): + args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + elif model_type.startswith("phi4"): + args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' + elif model_type.startswith("llama3"): + args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + elif model_type.startswith("llama2"): + args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + else: + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") params = og.GeneratorParams(model) params.set_search_options(**search_options) generator = og.Generator(model, params) # Set system prompt - default_system_prompt = "You are a helpful assistant." - if args.system_prompt == default_system_prompt: + if "<|" in args.system_prompt and "|>" in args.chat_template: + # User-provided system template already has tags + system_prompt = args.system_prompt + else: if model_type.startswith('phi2') or model_type.startswith('phi3'): system_prompt = f"<|system|>\n{args.system_prompt}<|end|>" elif model_type.startswith('phi4'): @@ -73,8 +78,7 @@ def main(args): elif model_type.startswith("llama2"): system_prompt = f"[INST] <>\n{args.system_prompt}\n<>" elif model_type.startswith("qwen2"): - qwen_system_prompt = "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." - system_prompt = f"<|im_start|>system\n{qwen_system_prompt}<|im_end|>\n" + system_prompt = f"<|im_start|>system\n{args.system_prompt}<|im_end|>\n" else: system_prompt = args.system_prompt From bedea860c4d0d8363ae816b33eba31d9522c00ba Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 20:06:23 +0000 Subject: [PATCH 07/13] Fix naming bug --- examples/python/model-chat.py | 2 +- examples/python/model-qa.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 2ef6d1188..e3e3980ac 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -59,7 +59,7 @@ def main(args): elif model_type.startswith("llama2"): args.chat_template = '{input}' elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' else: raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index a449abf86..0e6cd63d1 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -56,7 +56,7 @@ def main(args): elif model_type.startswith("llama2"): args.chat_template = '{input}' elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n' + args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' else: raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") From ca2b705f68d590cad83890bbcc03eb7de32a3ad6 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 20:56:56 +0000 Subject: [PATCH 08/13] Fix case where chat template is not provided --- examples/python/model-chat.py | 28 ++++++++++++---------------- examples/python/model-qa.py | 28 ++++++++++++---------------- 2 files changed, 24 insertions(+), 32 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index e3e3980ac..d60dd25cb 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -45,23 +45,19 @@ def main(args): if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - - if "<|" in args.chat_template and "|>" in args.chat_template: - # User-provided chat template already has tags - pass + else: + if model_type.startswith("phi2") or model_type.startswith("phi3"): + args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + elif model_type.startswith("phi4"): + args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' + elif model_type.startswith("llama3"): + args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + elif model_type.startswith("llama2"): + args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' else: - if model_type.startswith("phi2") or model_type.startswith("phi3"): - args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model_type.startswith("phi4"): - args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model_type.startswith("llama3"): - args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model_type.startswith("llama2"): - args.chat_template = '{input}' - elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' - else: - raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") if args.verbose: print("Model type is:", model_type) diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index 0e6cd63d1..b5ff59380 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -42,23 +42,19 @@ def main(args): if args.chat_template: if args.chat_template.count('{') != 1 or args.chat_template.count('}') != 1: raise ValueError("Chat template must have exactly one pair of curly braces with input word in it, e.g. '<|user|>\n{input} <|end|>\n<|assistant|>'") - - if "<|" in args.chat_template and "|>" in args.chat_template: - # User-provided chat template already has tags - pass + else: + if model_type.startswith("phi2") or model_type.startswith("phi3"): + args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' + elif model_type.startswith("phi4"): + args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' + elif model_type.startswith("llama3"): + args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' + elif model_type.startswith("llama2"): + args.chat_template = '{input}' + elif model_type.startswith("qwen2"): + args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' else: - if model_type.startswith("phi2") or model_type.startswith("phi3"): - args.chat_template = '<|user|>\n{input} <|end|>\n<|assistant|>' - elif model_type.startswith("phi4"): - args.chat_template = '<|im_start|>user<|im_sep|>\n{input}<|im_end|>\n<|im_start|>assistant<|im_sep|>' - elif model_type.startswith("llama3"): - args.chat_template = '<|start_header_id|>user<|end_header_id|>\n{input}<|eot_id|><|start_header_id|>assistant<|end_header_id|>' - elif model_type.startswith("llama2"): - args.chat_template = '{input}' - elif model_type.startswith("qwen2"): - args.chat_template = '<|im_start|>user\n{input}<|im_end|>\n<|im_start|>assistant\n' - else: - raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") + raise ValueError(f"Chat Template for model type {model_type} is not known. Please provide chat template using --chat_template") params = og.GeneratorParams(model) params.set_search_options(**search_options) From d74a4582080047f6b1831edbb084da08d1881905 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 20:58:29 +0000 Subject: [PATCH 09/13] Fix typo in system prompt condition --- examples/python/model-chat.py | 2 +- examples/python/model-qa.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index d60dd25cb..0752a2230 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -69,7 +69,7 @@ def main(args): if args.verbose: print("Generator created") # Set system prompt - if "<|" in args.system_prompt and "|>" in args.chat_template: + if "<|" in args.system_prompt and "|>" in args.system_prompt: # User-provided system template already has tags system_prompt = args.system_prompt else: diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index b5ff59380..efccf2d48 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -61,7 +61,7 @@ def main(args): generator = og.Generator(model, params) # Set system prompt - if "<|" in args.system_prompt and "|>" in args.chat_template: + if "<|" in args.system_prompt and "|>" in args.system_prompt: # User-provided system template already has tags system_prompt = args.system_prompt else: From 057f82b1c61ce5188fde348c2d9db1fcedc002ce Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 21:15:51 +0000 Subject: [PATCH 10/13] Try lowering transformers version for CIs --- test/python/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/python/requirements.txt b/test/python/requirements.txt index 9735643fe..5a176ff99 100644 --- a/test/python/requirements.txt +++ b/test/python/requirements.txt @@ -6,5 +6,5 @@ protobuf==5.27 sympy pytest onnx -transformers +transformers<4.48.2 huggingface_hub[cli] From e880ca964993fd29bf5831dfa75ed81154f2966a Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 21:31:27 +0000 Subject: [PATCH 11/13] Try lowering Hugging Face hub version for CIs --- test/python/requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/python/requirements.txt b/test/python/requirements.txt index 5a176ff99..d62a8106c 100644 --- a/test/python/requirements.txt +++ b/test/python/requirements.txt @@ -6,5 +6,5 @@ protobuf==5.27 sympy pytest onnx -transformers<4.48.2 -huggingface_hub[cli] +transformers +huggingface_hub[cli]<0.28.1 From 53f1432de300917a0bd0197f4c8aad52ba2938a0 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 22:26:43 +0000 Subject: [PATCH 12/13] Add actual HF token --- .github/workflows/linux-cpu-x64-build.yml | 5 +---- .github/workflows/linux-cpu-x64-nightly-build.yml | 5 +---- .github/workflows/linux-gpu-x64-build.yml | 5 +---- .github/workflows/mac-cpu-arm64-build.yml | 2 +- .github/workflows/win-cpu-x64-build.yml | 5 +---- .github/workflows/win-cuda-x64-build.yml | 5 +---- test/python/requirements.txt | 2 +- 7 files changed, 7 insertions(+), 22 deletions(-) diff --git a/.github/workflows/linux-cpu-x64-build.yml b/.github/workflows/linux-cpu-x64-build.yml index 5fc97369d..9f0daef4e 100644 --- a/.github/workflows/linux-cpu-x64-build.yml +++ b/.github/workflows/linux-cpu-x64-build.yml @@ -10,6 +10,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" @@ -84,10 +85,6 @@ jobs: python3 -m pip install -r test/python/cpu/ort/requirements.txt --user python3 -m pip install --user --no-index --no-deps --find-links build/cpu/wheel onnxruntime_genai - - name: Use Dummy HuggingFace Token - run: | - echo "HF_TOKEN=12345" >> $GITHUB_ENV - - name: Verify Build Artifacts if: always() continue-on-error: true diff --git a/.github/workflows/linux-cpu-x64-nightly-build.yml b/.github/workflows/linux-cpu-x64-nightly-build.yml index 61be5eb6f..0020c0889 100644 --- a/.github/workflows/linux-cpu-x64-nightly-build.yml +++ b/.github/workflows/linux-cpu-x64-nightly-build.yml @@ -12,6 +12,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} ort_dir: "onnxruntime-linux-x64-1.18.0" ort_zip: "onnxruntime-linux-x64-1.18.0.tgz" ort_url: "https://github.com/microsoft/onnxruntime/releases/download/v1.18.0/onnxruntime-linux-x64-1.18.0.tgz" @@ -55,10 +56,6 @@ jobs: python3 -m pip install -r test/python/cpu/ort/requirements.txt --user python3 -m pip install build/cpu/wheel/onnxruntime_genai*.whl --no-deps - - name: Use Dummy HuggingFace Token - run: | - echo "HF_TOKEN=12345" >> $GITHUB_ENV - - name: Run the python tests run: | python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models --e2e diff --git a/.github/workflows/linux-gpu-x64-build.yml b/.github/workflows/linux-gpu-x64-build.yml index e5a397a73..ccc93b0f8 100644 --- a/.github/workflows/linux-gpu-x64-build.yml +++ b/.github/workflows/linux-gpu-x64-build.yml @@ -12,6 +12,7 @@ concurrency: cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime.Gpu.Linux&api-version=6.0-preview.1" ORT_PACKAGE_NAME: Microsoft.ML.OnnxRuntime.Gpu.Linux ORT_NIGHTLY_SOURCE: "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT-Nightly/nuget/v3/index.json" @@ -109,10 +110,6 @@ jobs: bash -c " \ /usr/bin/cmake --build --preset linux_gcc_cuda_release" - - name: Use Dummy HuggingFace Token - run: | - echo "HF_TOKEN=12345" >> $GITHUB_ENV - - name: Install the onnxruntime-genai Python wheel and run python test run: | echo "Installing the onnxruntime-genai Python wheel and running the Python tests" diff --git a/.github/workflows/mac-cpu-arm64-build.yml b/.github/workflows/mac-cpu-arm64-build.yml index 658f7a660..1adf64a98 100644 --- a/.github/workflows/mac-cpu-arm64-build.yml +++ b/.github/workflows/mac-cpu-arm64-build.yml @@ -10,6 +10,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" jobs: @@ -86,7 +87,6 @@ jobs: - name: Run the python tests run: | source genai-macos-venv/bin/activate - export HF_TOKEN="12345" export ORTGENAI_LOG_ORT_LIB=1 python3 -m pip install requests python3 test/python/test_onnxruntime_genai.py --cwd test/python --test_models test/test_models diff --git a/.github/workflows/win-cpu-x64-build.yml b/.github/workflows/win-cpu-x64-build.yml index 3374a3b6d..3264a465e 100644 --- a/.github/workflows/win-cpu-x64-build.yml +++ b/.github/workflows/win-cpu-x64-build.yml @@ -11,6 +11,7 @@ concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} binaryDir: 'build/cpu/win-x64' ORT_NIGHTLY_REST_API: "https://feeds.dev.azure.com/aiinfra/PublicPackages/_apis/packaging/Feeds/ORT-Nightly/packages?packageNameQuery=Microsoft.ML.OnnxRuntime&api-version=6.0-preview.1" ORT_PACKAGE_NAME: "Microsoft.ML.OnnxRuntime" @@ -91,10 +92,6 @@ jobs: python3 -m pip install -r test\python\cpu\ort\requirements.txt --user python3 -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps - - name: Use Dummy HuggingFace Token - run: | - Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345" - - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" diff --git a/.github/workflows/win-cuda-x64-build.yml b/.github/workflows/win-cuda-x64-build.yml index 5cda163d1..273560b9f 100644 --- a/.github/workflows/win-cuda-x64-build.yml +++ b/.github/workflows/win-cuda-x64-build.yml @@ -12,6 +12,7 @@ concurrency: cancel-in-progress: true env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} AZCOPY_AUTO_LOGIN_TYPE: MSI AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4 cuda_dir: "${{ github.workspace }}\\cuda_sdk" @@ -80,10 +81,6 @@ jobs: python -m pip install -r test\python\cuda\ort\requirements.txt python -m pip install (Get-ChildItem ("$env:binaryDir\wheel\*.whl")) --no-deps - - name: Use Dummy HuggingFace Token - run: | - Add-Content -Path $env:GITHUB_ENV -Value "HF_TOKEN=12345" - - name: Run the Python Tests run: | python test/python/test_onnxruntime_genai.py --cwd "test\python" --test_models "test\test_models" --e2e diff --git a/test/python/requirements.txt b/test/python/requirements.txt index d62a8106c..9735643fe 100644 --- a/test/python/requirements.txt +++ b/test/python/requirements.txt @@ -7,4 +7,4 @@ sympy pytest onnx transformers -huggingface_hub[cli]<0.28.1 +huggingface_hub[cli] From e3ed1bc2ec30edeb6d852f55ed7525dc4b9e38c7 Mon Sep 17 00:00:00 2001 From: Kunal Vaishnavi Date: Fri, 31 Jan 2025 22:47:19 +0000 Subject: [PATCH 13/13] Always format chat template --- examples/python/model-chat.py | 6 +----- examples/python/model-qa.py | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/examples/python/model-chat.py b/examples/python/model-chat.py index 0752a2230..6068d7524 100644 --- a/examples/python/model-chat.py +++ b/examples/python/model-chat.py @@ -99,11 +99,7 @@ def main(args): if args.timings: started_timestamp = time.time() - # If there is a chat template, use it - prompt = text - if args.chat_template: - prompt = f'{args.chat_template.format(input=text)}' - + prompt = f'{args.chat_template.format(input=text)}' input_tokens = tokenizer.encode(prompt) generator.append_tokens(input_tokens) diff --git a/examples/python/model-qa.py b/examples/python/model-qa.py index efccf2d48..e5e449a77 100644 --- a/examples/python/model-qa.py +++ b/examples/python/model-qa.py @@ -91,11 +91,7 @@ def main(args): if args.timings: started_timestamp = time.time() - # If there is a chat template, use it - prompt = text - if args.chat_template: - prompt = f'{args.chat_template.format(input=text)}' - + prompt = f'{args.chat_template.format(input=text)}' input_tokens = tokenizer.encode(prompt) generator.append_tokens(input_tokens)