Skip to content

Commit

Permalink
Merge branch 'main' into aciddelgado/fix_top_k
Browse files Browse the repository at this point in the history
  • Loading branch information
aciddelgado committed Jan 21, 2025
2 parents 601c261 + 471e715 commit e8ad028
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ See documentation at https://onnxruntime.ai/docs/genai.

| Support matrix | Supported now | Under development | On the roadmap |
| -------------- | ------------- | ----------------- | -------------- |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> | Whisper | Stable diffusion |
| Model architectures | Gemma <br/> Llama * <br/> Mistral + <br/> Phi (language + vision) <br/> Qwen <br/> Nemotron <br/> Granite <br/> AMD OLMo | Whisper | Stable diffusion |
| API | Python <br/> C# <br/> C/C++ <br/> Java ^ | Objective-C | |
| Platform | Linux <br/> Windows <br/> Mac ^ <br/> Android ^ | | iOS |
| Architecture | x86 <br/> x64 <br/> Arm64 ~ | | |
Expand Down
2 changes: 1 addition & 1 deletion src/models/model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,7 @@ std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, const char* config_path, con
}

std::shared_ptr<Model> CreateModel(OrtEnv& ort_env, std::unique_ptr<Config> config) {
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
std::set<std::string> llm_types = {"chatglm", "decoder", "gemma", "gemma2", "granite", "llama", "mistral", "nemotron", "olmo", "phi", "phimoe", "phi3", "phi3small", "qwen2"};
if (config->model.type == "gpt2")
return std::make_shared<Gpt_Model>(std::move(config), ort_env);
if (llm_types.find(config->model.type) != llm_types.end())
Expand Down
2 changes: 1 addition & 1 deletion src/ort_genai_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ OgaResult* OGA_API_CALL OgaGenerator_GetOutput(const OgaGenerator* oga_generator
throw std::runtime_error("Unexpected error. Trying to access DML memory but the project is not compiled with DML.");
#endif
} else {
throw std::runtime_error("Unsupported device type: " + static_cast<int>(device_type));
throw std::runtime_error("Unsupported device type: " + std::to_string(static_cast<int>(device_type)));
}

auto tensor = std::make_shared<Generators::Tensor>(std::move(ortvalue_clone));
Expand Down
1 change: 1 addition & 0 deletions src/python/py/models/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ The tool currently supports the following model architectures.
- Nemotron
- Phi
- Qwen
- AMD OLMo

It is intended for supporting the latest, popular state-of-the-art models.

Expand Down
12 changes: 11 additions & 1 deletion src/python/py/models/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ def make_genai_config(self, model_name_or_path, extra_kwargs, out_dir):

genai_config = {
"model": {
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") else 1, # config.bos_token_id not present in ChatGLM model configs.
"bos_token_id": config.bos_token_id if hasattr(config, "bos_token_id") and config.bos_token_id != None else 1, # config.bos_token_id not present in ChatGLM model configs.
"context_length": self.context_length,
"decoder": {
"session_options" : {
Expand Down Expand Up @@ -3068,6 +3068,14 @@ def make_layer(self, layer_id, layer):
layer.self_attn = layer.self_attn if hasattr(layer, 'self_attn') else layer.self_attention
super().make_layer(layer_id, layer)

class OLMoModel(Model):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
super().__init__(config, io_dtype, onnx_dtype, ep, cache_dir, extra_options)

def make_layernorm(self, layer_id, layernorm, skip, simple, location):
layernorm.weight = torch.ones(self.hidden_size)
layernorm.bias = torch.zeros(self.hidden_size)
super().make_layernorm(layer_id, layernorm, skip, simple, location)

class GraniteModel(MistralModel):
def __init__(self, config, io_dtype, onnx_dtype, ep, cache_dir, extra_options):
Expand Down Expand Up @@ -3200,6 +3208,8 @@ def create_model(model_name, input_path, output_dir, precision, execution_provid
onnx_model = MistralModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "NemotronForCausalLM":
onnx_model = NemotronModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "OlmoForCausalLM":
onnx_model = OLMoModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "PhiForCausalLM":
onnx_model = PhiModel(config, io_dtype, precision, execution_provider, cache_dir, extra_options)
elif config.architectures[0] == "Phi3ForCausalLM" and config.max_position_embeddings == config.original_max_position_embeddings:
Expand Down
2 changes: 2 additions & 0 deletions test/python/_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def run_subprocess(
def get_model_paths():
hf_paths = {
"phi-2": "microsoft/phi-2",
"olmo": "amd/AMD-OLMo-1B-SFT-DPO",
"qwen": "Qwen/Qwen2.5-0.5B",
"phi-3.5": "microsoft/Phi-3.5-mini-instruct",
# "llama-3.2": "meta-llama/Llama-3.2-1B-instruct",
"granite-3.0": "ibm-granite/granite-3.0-2b-instruct",
Expand Down

0 comments on commit e8ad028

Please sign in to comment.