From d0c1cf0b2dc31d00adf529f4cd341b05889771a4 Mon Sep 17 00:00:00 2001 From: Highbyte Date: Wed, 2 Oct 2024 22:12:39 +0200 Subject: [PATCH] WIP: Fix some issues in WASM self-hosted coding assistant config. --- .../Emulator/SystemSetup/C64Setup.cs | 50 +++++++++---------- .../Pages/Commodore64/C64ConfigUI.razor | 8 +-- .../Inference/CodeCompletionInference.cs | 11 +++- .../Inference/OpenAI/ApiConfig.cs | 11 ++++ .../OpenAI/DisableActivityHandler.cs | 37 ++++++++++++++ .../OpenAI/OpenAIInferenceBackend.cs | 9 +++- .../OpenAI/SelfHostedLlmTransport.cs | 19 +++++-- .../Highbyte.DotNet6502.AI.csproj | 2 +- 8 files changed, 110 insertions(+), 37 deletions(-) create mode 100644 src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs b/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs index e953f096..33243b1c 100644 --- a/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs +++ b/src/apps/Highbyte.DotNet6502.App.WASM/Emulator/SystemSetup/C64Setup.cs @@ -241,21 +241,21 @@ public static async Task GetCodeSuggestionImplementation(C64Hos public static async Task GetOpenAIConfig(ILocalStorageService localStorageService) { var apiKey = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:ApiKey"); + var deploymentName = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:DeploymentName"); if (string.IsNullOrEmpty(deploymentName)) - deploymentName = "gpt-4o"; // Default to a model that works well - - var endpoint = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint"); - Uri.TryCreate(endpoint, UriKind.Absolute, out var endPointUri); + deploymentName = "gpt-4o"; // Default to a OpenAI model that works well - var selfHosted = await localStorageService.GetItemAsync($"{ApiConfig.CONFIG_SECTION}:SelfHosted"); + // For future use: Endpoint can be set if OpenAI is accessed via Azure endpoint. + //var endpoint = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint"); + //Uri.TryCreate(endpoint, UriKind.Absolute, out var endPointUri); var apiConfig = new ApiConfig() { ApiKey = apiKey, // Api key for OpenAI (required), Azure OpenAI (required), or SelfHosted (optional). DeploymentName = deploymentName, // AI model name - Endpoint = endPointUri, // Used if using Azure OpenAI, or SelfHosted - SelfHosted = selfHosted, // Set to true to use self-hosted OpenAI API compatible endpoint. + //Endpoint = endPointUri, // Used if using Azure OpenAI + SelfHosted = false, }; return apiConfig; } @@ -263,9 +263,11 @@ public static async Task GetOpenAIConfig(ILocalStorageService localSt public static async Task GetSelfHostedOpenAICompatibleConfig(ILocalStorageService localStorageService) { var apiKey = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:ApiKey"); + if (apiKey == string.Empty) + apiKey = null; var deploymentName = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:DeploymentName"); if (string.IsNullOrEmpty(deploymentName)) - deploymentName = "stable-code:3b-code-q4_0"; // Default to a Ollama model that is optimized for code completion + deploymentName = "codellama:13b"; // Default to a Ollama model that (sometimes) works... TODO: Improve parsing of response (which does not seem as exact as from OpenAI models), or improve prompt with examples? var endpoint = await localStorageService.GetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:Endpoint"); if (string.IsNullOrEmpty(endpoint)) endpoint = "http://localhost:11434/api"; // Default to local Ollama @@ -281,21 +283,6 @@ public static async Task GetSelfHostedOpenAICompatibleConfig(ILocalSt return apiConfig; } - - public static async Task SaveOpenAICodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig) - { - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:ApiKey", apiConfig.ApiKey); - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:DeploymentName", apiConfig.DeploymentName); - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : ""); - } - - public static async Task SaveSelfHostedOpenAICompatibleCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig) - { - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:ApiKey", !string.IsNullOrEmpty(apiConfig.ApiKey) ? apiConfig.ApiKey : string.Empty); - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:DeploymentName", !string.IsNullOrEmpty(apiConfig.DeploymentName) ? apiConfig.DeploymentName : string.Empty); - await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : ""); - } - public static async Task GetCustomAIEndpointConfig(ILocalStorageService localStorageService) { var apiKey = await localStorageService.GetItemAsStringAsync($"{CustomAIEndpointConfig.CONFIG_SECTION}:ApiKey"); @@ -315,9 +302,22 @@ public static async Task GetCustomAIEndpointConfig(ILoca return apiConfig; } + public static async Task SaveOpenAICodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig) + { + await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:ApiKey", apiConfig.ApiKey ?? string.Empty); + //await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : string.Empty); + } + + public static async Task SaveSelfHostedOpenAICompatibleCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, ApiConfig apiConfig) + { + await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:ApiKey", apiConfig.ApiKey ?? string.Empty); + await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:DeploymentName", apiConfig.DeploymentName ?? string.Empty); + await localStorageService.SetItemAsStringAsync($"{ApiConfig.CONFIG_SECTION_SELF_HOSTED}:Endpoint", apiConfig.Endpoint != null ? apiConfig.Endpoint.OriginalString : string.Empty); + } + public static async Task SaveCustomCodingAssistantConfigToLocalStorage(ILocalStorageService localStorageService, CustomAIEndpointConfig customAIEndpointConfig) { - await localStorageService.SetItemAsStringAsync($"{CustomAIEndpointConfig.CONFIG_SECTION}:ApiKey", customAIEndpointConfig.ApiKey); - await localStorageService.SetItemAsStringAsync($"{CustomAIEndpointConfig.CONFIG_SECTION}:Endpoint", customAIEndpointConfig.Endpoint != null ? customAIEndpointConfig.Endpoint.OriginalString : ""); + await localStorageService.SetItemAsStringAsync($"{CustomAIEndpointConfig.CONFIG_SECTION}:ApiKey", customAIEndpointConfig.ApiKey ?? string.Empty); + await localStorageService.SetItemAsStringAsync($"{CustomAIEndpointConfig.CONFIG_SECTION}:Endpoint", customAIEndpointConfig.Endpoint != null ? customAIEndpointConfig.Endpoint.OriginalString : string.Empty); } } diff --git a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor index 353bbc4a..c912496d 100644 --- a/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor +++ b/src/apps/Highbyte.DotNet6502.App.WASM/Pages/Commodore64/C64ConfigUI.razor @@ -208,17 +208,17 @@ @if (@C64HostConfig.CodeSuggestionBackendType == CodeSuggestionBackendTypeEnum.SelfHostedOpenAICompatible) {
-
Self-hosted OpenAI compatible endpoint (ex: Ollama on http://localhost:11434/api)
+
Self-hosted OpenAI compatible endpoint
@if (_selfHostedOpenAICompatibleAIApiConfig != null) { - @* *@ - } + + }
-
Model name (ex: stable-code:3b-code-q4_0)
+
Model name
@if (_selfHostedOpenAICompatibleAIApiConfig != null) { diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs index b8bef3f6..fa98e0e7 100644 --- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs +++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/CodeCompletionInference.cs @@ -15,7 +15,7 @@ Predict what text the user in would insert at the cursor position indicated by ^ Do not make up new information. If you're not sure, just reply with NO_PREDICTION. RULES: -1. Reply with OK:,then in square brackets (with not preceeding space) the predicted text, then END_INSERTION, and no other output. +1. Reply with OK:,then in square brackets [] (without preceeding space) the predicted text, then END_INSERTION, and no other output. 2. If there isn't enough information to predict any words that the user would type next, just reply with the word NO_PREDICTION. 3. NEVER invent new information. If you can't be sure what the user is about to type, ALWAYS stop the prediction with END_INSERTION."); @@ -51,8 +51,15 @@ public virtual async Task GetInsertionSuggestionAsync(IInferenceBackend { var chatOptions = BuildPrompt(config, textBefore, textAfter); var response = await inference.GetChatResponseAsync(chatOptions); - if (response.Length > 5 && response.StartsWith("OK:[", StringComparison.Ordinal)) + + if (response.Length > 5 && + (response.StartsWith("OK:[", StringComparison.Ordinal) + || response.StartsWith("OK: [", StringComparison.Ordinal))) { + // Some tested Ollama models respons starts with "OK: [" , some with "OK:[" (even though the prompt doesn't have a space) + if (response.StartsWith("OK: [", StringComparison.Ordinal)) + response = response.Replace("OK: [", "OK:["); + // Avoid returning multiple sentences as it's unlikely to avoid inventing some new train of thought. var trimAfter = response.IndexOfAny(['.', '?', '!']); if (trimAfter > 0 && response.Length > trimAfter + 1 && response[trimAfter + 1] == ' ') diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs index 9b9603c8..60cbac5c 100644 --- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs +++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/ApiConfig.cs @@ -8,6 +8,17 @@ public class ApiConfig { public string? ApiKey { get; set; } public string? DeploymentName { get; set; } + public string EndpointString + { + get + { + return Endpoint?.ToString() ?? string.Empty; + } + set + { + Endpoint = string.IsNullOrWhiteSpace(value) ? null : new Uri(value); + } + } public Uri? Endpoint { get; set; } public bool SelfHosted { get; set; } diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs new file mode 100644 index 00000000..5bd5797f --- /dev/null +++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/DisableActivityHandler.cs @@ -0,0 +1,37 @@ +using System.Diagnostics; + +namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI; +public class DisableActivityHandler : DelegatingHandler +{ + /// + /// Distributed tracing headers that is set automatically by .NET that local Ollama API CORS rules doesn't allow. + /// + static readonly List s_HeadersToRemove = new List + { + "x-ms-client-request-id", + "x-ms-return-client-request-id" + }; + + public DisableActivityHandler(HttpMessageHandler innerHandler) : base(innerHandler) + { + + } + + protected override async Task SendAsync(HttpRequestMessage request, CancellationToken cancellationToken) + { + // Note: A workaround by setting Activity.Current = null doesn't seem to work. Instead remove headers manually below. + //Activity.Current = null; + + // Remove s_HeadersToRemove list of header from request if they exist. + foreach (var headerName in s_HeadersToRemove) + { + if (request.Headers.Contains(headerName)) + { + request.Headers.Remove(headerName); + Activity.Current = null; + } + } + + return await base.SendAsync(request, cancellationToken); + } +} diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs index c63d7b19..9577ed35 100644 --- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs +++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/OpenAIInferenceBackend.cs @@ -1,5 +1,6 @@ // Based on https://github.com/dotnet/smartcomponents +using System.Net; using System.Runtime.InteropServices; using Azure; using Azure.AI.OpenAI; @@ -72,7 +73,13 @@ private static OpenAIClient CreateClient(ApiConfig apiConfig) { if (apiConfig.SelfHosted) { - var transport = new SelfHostedLlmTransport(apiConfig.Endpoint!); + //var transport = new SelfHostedLlmTransport(apiConfig.Endpoint!); + + var httpClientHandler = new HttpClientHandler(); + var disableActivityHandler = new DisableActivityHandler(httpClientHandler); + var httpClient = new HttpClient(disableActivityHandler); + var transport = new SelfHostedLlmTransport(apiConfig.Endpoint!, httpClient); + return new OpenAIClient(apiConfig.ApiKey, new() { Transport = transport }); } else if (apiConfig.Endpoint is null) diff --git a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs index 03c9bfdf..a01638fc 100644 --- a/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs +++ b/src/libraries/Highbyte.DotNet6502.AI/CodingAssistant/Inference/OpenAI/SelfHostedLlmTransport.cs @@ -9,13 +9,24 @@ namespace Highbyte.DotNet6502.AI.CodingAssistant.Inference.OpenAI; /// Used to resolve queries using Ollama or anything else that exposes an OpenAI-compatible /// endpoint with a scheme/host/port set of your choice. /// -internal class SelfHostedLlmTransport(Uri endpoint) : HttpClientTransport +internal class SelfHostedLlmTransport : HttpClientTransport { + private readonly Uri _endpoint; + + internal SelfHostedLlmTransport(Uri endpoint) : base() + { + _endpoint = endpoint; + } + internal SelfHostedLlmTransport(Uri endpoint, HttpClient httpClient) : base(httpClient) + { + _endpoint = endpoint; + } + public override ValueTask ProcessAsync(HttpMessage message) { - message.Request.Uri.Scheme = endpoint.Scheme; - message.Request.Uri.Host = endpoint.Host; - message.Request.Uri.Port = endpoint.Port; + message.Request.Uri.Scheme = _endpoint.Scheme; + message.Request.Uri.Host = _endpoint.Host; + message.Request.Uri.Port = _endpoint.Port; return base.ProcessAsync(message); } } diff --git a/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj b/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj index d15e781a..1aa7b82d 100644 --- a/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj +++ b/src/libraries/Highbyte.DotNet6502.AI/Highbyte.DotNet6502.AI.csproj @@ -25,7 +25,7 @@ - +