Skip to content

Commit

Permalink
Remove the default max_tokens for /v1/chat/completions (#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
schoi-habana authored Dec 16, 2024
1 parent cc2ca4a commit 61309b2
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ authors = ["Olivier Dehaene"]
homepage = "https://github.com/huggingface/text-generation-inference"

[workspace.dependencies]
tokenizers = { version = "0.19.1", features = ["http"] }
tokenizers = { version = "0.20.0", features = ["http"] }
hf-hub = { version = "0.3.1", features = ["tokio"] }

[profile.release]
Expand Down
2 changes: 1 addition & 1 deletion benchmark/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
// We need to download it outside of the Tokio runtime
let params = FromPretrainedParameters {
revision,
auth_token,
token: auth_token,
..Default::default()
};
Tokenizer::from_pretrained(tokenizer_name.clone(), Some(params)).unwrap()
Expand Down
6 changes: 2 additions & 4 deletions router/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -608,7 +608,6 @@ async fn completions(
..
} = req;

let max_new_tokens = max_tokens.or(Some(100));
let stop = stop.unwrap_or_default();
// enable greedy only when temperature is 0
let (do_sample, temperature) = match temperature {
Expand Down Expand Up @@ -657,7 +656,7 @@ async fn completions(
top_p: req.top_p,
typical_p: None,
do_sample,
max_new_tokens,
max_new_tokens: max_tokens,
return_full_text: None,
stop: stop.clone(),
truncate: None,
Expand Down Expand Up @@ -1019,7 +1018,6 @@ async fn chat_completions(
} = req;

let repetition_penalty = presence_penalty.map(|x| x + 2.0);
let max_new_tokens = max_tokens.or(Some(100));
let logprobs = logprobs.unwrap_or(false);
let tool_prompt = tool_prompt.unwrap_or_default();
let stop = stop.unwrap_or_default();
Expand Down Expand Up @@ -1081,7 +1079,7 @@ async fn chat_completions(
top_p: req.top_p,
typical_p: None,
do_sample,
max_new_tokens,
max_new_tokens: max_tokens,
return_full_text: None,
stop,
truncate: None,
Expand Down

0 comments on commit 61309b2

Please sign in to comment.