Skip to content

Commit

Permalink
Correct async tgt, but break drafts
Browse files Browse the repository at this point in the history
  • Loading branch information
AutonomicPerfectionist committed Nov 16, 2023
1 parent 679067d commit 6903128
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions examples/speculative/speculative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ struct seq_draft {

struct seq_async_run {
struct ggml_cgraph * cgraph;
llama_batch batch;
};

int main(int argc, char ** argv) {
Expand Down Expand Up @@ -220,10 +221,10 @@ int main(int argc, char ** argv) {
int s_keep = 0;

if (!tgt_cgraphs.empty()) {
LOG("Finishing async decode\n");
LOG("Finishing async decode, should_run_async = %d\n", should_run_async);
struct seq_async_run run = tgt_cgraphs.back();
struct ggml_cgraph * cgraph = run.cgraph;
llama_finish_async_decode(*ctx_tgt, batch_tgt, cgraph);
llama_finish_async_decode(*ctx_tgt, run.batch, cgraph);
tgt_cgraphs.pop_back();
}

Expand Down Expand Up @@ -325,6 +326,7 @@ int main(int argc, char ** argv) {

struct seq_async_run run;
run.cgraph = llama_start_async_decode(*ctx_tgt, batch_tgt);
run.batch = batch_tgt;
tgt_cgraphs.push_front(run);
llama_kv_cache_seq_rm(ctx_tgt, 0, n_past_tgt, n_past_tgt + 1);

Expand Down Expand Up @@ -538,6 +540,7 @@ int main(int argc, char ** argv) {

LOG("target batch: %s\n", LOG_BATCH_TOSTR_PRETTY(ctx_tgt, batch_tgt).c_str());
struct seq_async_run run;
run.batch = batch_tgt;
run.cgraph = llama_start_async_decode(*ctx_tgt, batch_tgt);
tgt_cgraphs.push_front(run);

Expand Down

0 comments on commit 6903128

Please sign in to comment.