diff --git a/src/modules/taskyon/chat.ts b/src/modules/taskyon/chat.ts index 9401a09b..f0e120aa 100644 --- a/src/modules/taskyon/chat.ts +++ b/src/modules/taskyon/chat.ts @@ -443,12 +443,12 @@ export function mapFunctionNames( return toolNames?.map((t) => tools[t]); } -async function getOpenRouterGenerationInfo( +export async function getOpenRouterGenerationInfo( generationId: string, headers: Record ) { let retryCount = 0; - let delay = 5000; // initial delay + let delay = 5000; // first delay while (retryCount < 3) { const response = await fetch( @@ -459,8 +459,9 @@ async function getOpenRouterGenerationInfo( ); if (response.ok) { - const generationInfo = - (await response.json()) as OpenRouterGenerationInfo; + const generationInfo = (await response.json()) as { + data: OpenRouterGenerationInfo; + }; console.log('received generation info for task'); return generationInfo.data; } else if (response.status === 404) { @@ -479,19 +480,11 @@ async function getOpenRouterGenerationInfo( ); } -export async function enrichWithDelayedUsageInfos( - generationId: string, - headers: Record, +export function enrichWithDelayedUsageInfos( task: LLMTask, - taskManager: TyTaskManager + taskManager: TyTaskManager, + generationInfo: OpenRouterGenerationInfo ) { - // TODO: if we get a 404.. try again sometimes the dat just isn't there yet ;) - await sleep(5000); - const generationInfo = await getOpenRouterGenerationInfo( - generationId, - headers - ); - if (generationInfo) { if ( generationInfo.native_tokens_completion && diff --git a/src/modules/taskyon/taskWorker.ts b/src/modules/taskyon/taskWorker.ts index f43a8b54..8f5afb3f 100644 --- a/src/modules/taskyon/taskWorker.ts +++ b/src/modules/taskyon/taskWorker.ts @@ -6,6 +6,7 @@ import { estimateChatTokens, generateHeaders, getApiConfigCopy, + getOpenRouterGenerationInfo, } from './chat'; import { renderTasks4Chat } from './promptCreation'; import { @@ -15,6 +16,7 @@ import { LLMTask, TaskResult, StructuredResponse, + OpenRouterGenerationInfo, } from './types'; import { addTask2Tree, processTasksQueue } from './taskManager'; import type { OpenAI } from 'openai'; @@ -22,8 +24,7 @@ import { TyTaskManager } from './taskManager'; import { Tool, handleFunctionExecution } from './tools'; import { ToolBase } from './types'; import { dump, load } from 'js-yaml'; -import { deepMerge } from './utils'; -import { string } from 'zod'; +import { deepMerge, sleep } from './utils'; function isOpenAIFunctionCall( choice: OpenAI.ChatCompletion['choices'][0] @@ -188,13 +189,46 @@ export async function processChatTask( // get llm inference stats // TODO: we should replace this with an inference task which has the LLM as a parent... - if (chatCompletion && chatState.selectedApi == 'openrouter.ai') { - void enrichWithDelayedUsageInfos( - chatCompletion.id, - generateHeaders(apiKey, chatState.siteUrl, chatState.selectedApi), - task, - taskManager - ); + if (chatCompletion && chatState.selectedApi === 'openrouter.ai') { + void sleep(5000).then(() => { + void getOpenRouterGenerationInfo( + chatCompletion.id, + generateHeaders(apiKey, chatState.siteUrl, chatState.selectedApi) + ).then((generationInfo) => { + enrichWithDelayedUsageInfos(task, taskManager, generationInfo); + }); + }); + } else if (chatCompletion && chatState.selectedApi === 'taskyon') { + // TODO: cancel this section, if we're not logged in to taskyon... + void sleep(5000).then(() => { + const headers = generateHeaders( + apiKey, + chatState.siteUrl, + api.name + ); + const baseUrl = new URL(api.baseURL).origin; + console.log('get generation info from ', baseUrl); + const url = `${baseUrl}/rest/v1/api_usage_log?select=reference_data&id=eq.${chatCompletion.id}`; + void fetch(url, { headers }) + .then((response) => { + if (!response.ok) { + throw new Error( + `Could not find generation information for task ${task.id}` + ); + } + return response.json() as Promise< + { reference_data: OpenRouterGenerationInfo }[] + >; + }) + .then((data) => { + console.log('taskyon generation info:', data); + enrichWithDelayedUsageInfos( + task, + taskManager, + data[0].reference_data + ); + }); + }); } else if (chatCompletion?.usage) { // openai sends back the exact number of prompt tokens :) task.debugging.promptTokens = chatCompletion.usage.prompt_tokens; diff --git a/src/modules/taskyon/types.ts b/src/modules/taskyon/types.ts index 8d6ec207..c5ef8a42 100644 --- a/src/modules/taskyon/types.ts +++ b/src/modules/taskyon/types.ts @@ -67,28 +67,26 @@ export type ChatCompletionResponse = { }; export interface OpenRouterGenerationInfo { - data: { - id: string; - total_cost: number; - created_at: string; // ISO 8601 date string - model: string; - app_id: number; - streamed: boolean; - cancelled: boolean; - provider_name: string; - latency: number; - moderation_latency: null | number; // can be null - generation_time: number; - finish_reason: string; - tokens_prompt: number; - tokens_completion: number; - native_tokens_prompt: number; - native_tokens_completion: number; - num_media_prompt: null | number; // can be null - num_media_completion: null | number; // can be null - origin: string; - usage: number; - }; + id: string; + total_cost: number; + created_at: string; // ISO 8601 date string + model: string; + app_id: number; + streamed: boolean; + cancelled: boolean; + provider_name: string; + latency: number; + moderation_latency: null | number; // can be null + generation_time: number; + finish_reason: string; + tokens_prompt: number; + tokens_completion: number; + native_tokens_prompt: number; + native_tokens_completion: number; + num_media_prompt: null | number; // can be null + num_media_completion: null | number; // can be null + origin: string; + usage: number; } export interface JSONSchemaForFunctionParameter {