openAI.sublime-settings

{
    // URL for OpenAI compatiable APIs.
    // It must start with http:// or https://, which selects protocol for connection. Use http:// when using localhost.
    // Selected parts of code and prompt will be sent to that URL, so make sure, you have all necessary permission to do so.
    // Example: "http://localhost:11434" (assuming Ollama is running on localhost)
    "url": "https://api.openai.com",

    // Your openAI token
    // Token can be anything so long as it is at least 10 characters long.
    "token": "",

    // Apply Sublime Text markdown syntax highlight to OpenAI completion output panel text.
    // Affects only `"output_mode": "panel"`.
    // `MultimarkdownEditing` package highly recommended to install to apply syntax highlight for a wider range of languages.
    "markdown": true,

    // Settings that configures presentation of a chat view for both output panel and chat tap
    "chat_presentation": {
        // Toggles whether to present gutter itself
        "gutter_enabled": true,


        // Toggles whether to present line numbers row
        "line_numbers_enabled": true,

        // Toggles whether to allow the chat view to be overscrolled
        "scroll_past_end": false,

        // Toggles whether or not to use reverse settings for a tab comparing to output panel
        // i.e. in case of the default settings neither gutter or a line numbers will be presented and the overscroll is enabled
        "reverse_for_tab": true,

        // For phantom mode, for "In New Tab" option you can set this to true to avoid save file on close dialog for each view opened by plugin
        "is_tabs_discardable": false,
    },

    // Minimum amount of characters selected to perform completion.
    "minimum_selection_length": 10,

    // The number of lines to read from the end of the standard build output panel (exec)
    // -1 to read all the output (be carefull with that build output can be reeeeeeeeealy long)
    "build_output_limit": 100,

    // Status bar hint setup that presents major info about currently active assistant setup (from the array of assistant objects above)
    // Possible options:
    //  - name: User defined assistant setup name
    //  - output_mode: Model output prompt mode (panel|append|insert|replace)
    //  - chat_model: Which OpenAI model are used within this setup (i.e. gpt-4o-mini, gpt-3.5-turbo-16k).
    //
    // You're capable to mix these whatever you want and the text in status bar will follow.
    "status_hint": [
        // "name",
        // "output_mode",
        // "chat_model"
     ],

    // Proxy setting
    "proxy": {
        // Proxy address
        "address": "",

        // Proxy port
        "port": 8080,

        // Proxy username
        "username": "",

        // Proxy password
        "password": ""
    },

    "assistants": [
        {
            // A string that will presented in command palette.
            "name": "Example", // **REQUIRED**

            // Mode of how plugin should prompts its output, available options:
            //  - `view`: model response will be output in either separate view or output panel, if chat view doesn't exist in the given window.
            //  - `phantom`: llm prompts in phantom in a last active view in a non-dstruptive to the buffer content way, each such phantom provides useful commands to handle output further.
            "output_mode": "view", // **REQUIRED**

            // The model that generates the chat completion.
            // Generally here should be either "gpt-4o-latest|gpt-4o-mini" or their specified versions.
            // If using custom API, refer to their documentation for supported models.
            // Learn more at https://beta.openai.com/docs/models
            "chat_model": "gpt-4o-mini", // **REQUIRED**

            // ChatGPT model knows how to role, lol
            // It can act as a different kind of person. Recently in this plugin it was acting
            // like as a code assistant. With this setting you're able to set it up more precisely.
            // E.g. "You are (rust|python|js|whatever) developer assistant", "You are an english tutor" and so on.
            // HINT: This is either `system` or `developer` message depending on api_spec set.
            "assistant_role": "You are a senior code assistant",

            // URL for OpenAI alike API.
            // It must start with `http://` or `https://`, which selects protocol for connection. Use `http://` when using localhost.
            // it **COULD**, but not necessary has to be a full path of an endpoint. Yet it's necessary to make some third party providers to work.
            // Example: "http://localhost:11434" (assuming Ollama is running on localhost)
            "url": "https://api.openai.com/v1/chat/completions",

            // Your whaterver service token
            // OpenAI and any other alike API token to put.
            "token": "",

            // Toggle for function calls llm capability
            // Check if your llm supports this feature before toggling this on
            "tools": false,

            // What sampling temperature to use, between 0 and 2.
            // Higher values like 0.8 will make the output more random,
            // while lower values like 0.2 will make it more focused and deterministic.
            //
            // OpenAI generally recommend altering this or `top_p` but not both.
            "temperature": 1.0,

            // The maximum number of tokens to generate in the completion.
            // The token count of your prompt plus `max_tokens` cannot exceed the model's context length.
            // (One token is roughly 4 characters for normal English text)
            // DEPRECATED for OpenAI endpoint
            // USE EITHER this or `"max_completion_tokens"` setting
            "max_tokens": 4096,

            // Since o1 (September 2024) OpenAI deprecated max_token key,
            // Use this field to set the cap instead. The default value set here is recommended by OpenAI
            // _minimal_ value for o1 model suggested by OpenAI is 250000. https://platform.openai.com/docs/guides/reasoning/allocating-space-for-reasoning
            "max_completion_tokens": 4096,

            // `"api_type": "open_ai"` only
            // The matter of efforts reasoning models to put into the answer
            // - "low"
            // - "medium"
            // - "high"
            "reasoning_effort": "low",

            // An alternative to sampling with temperature, called nucleus sampling,
            // where the model considers the results of the tokens with `top_p` probability mass.
            // So 0.1 means only the tokens comprising the top 10% probability mass are considered.
            //
            // OpenAI generally recommend altering this or `temperature` but not both.
            "top_p": 1.0,

            // Number between -2.0 and 2.0.
            // Positive values penalize new tokens based on their existing frequency in the text so far,
            // decreasing the model's likelihood to repeat the same line verbatim.
            // docs: https://platform.openai.com/docs/api-reference/parameter-details
            "frequency_penalty": 0.0,

            // Toggles whether to stream the response from the server or to get in atomically
            // after llm finishes its prompting.
            //
            // By default this is true.
            "stream": true,

            // Number between -2.0 and 2.0.
            // Positive values penalize new tokens based on whether they appear in the text so far,
            // increasing the model's likelihood to talk about new topics.
            // docs: https://platform.openai.com/docs/api-reference/parameter-details
            "presence_penalty": 0.0,

            // API's are not quite compatiable with each other
            // Thus you should specify the api for the given assistant.
            // `plain_text` — default option.
            // - `open_ai`, new OpenAI API specs, with reasoning efforts support etc
            // - `plain_text`, old OpenAI API spec compatiable with most of 3rd parties hosts
            // - `antropic` (UNIMPLEMENTED), api that forces strict order of user/assistant messages in the request.
            "api_type": "open_ai"
        },

        // Examples
        {
            "name": "General Assistant Localhost",
            "output_mode": "panel",
            "url": "http://127.0.0.1:8080", // See ma, no internet connection.
            "token": "",
            "chat_model": "Llama-3-8b-Q4-chat-hf",
            "assistant_role": "1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $20 (depending on the quality of your output).\n10. Always look closely to **ALL** the data provided by a user. It's very important to look so closely as you can there. Ppl can die otherways.\n11. If user strictly asks you about to write the code, write the code first, without explanation, and add them only by additional user request.\n",
            "temperature": 1,
            "max_tokens": 4000,
        },
        {
            "name": "General Assistant",
            "output_mode": "panel",
            "chat_model": "gpt-4o-mini",
            "assistant_role": "1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $20 (depending on the quality of your output).\n10. Always look closely to **ALL** the data provided by a user. It's very important to look so closely as you can there. Ppl can die otherways.\n11. If user strictly asks you about to write the code, write the code first, without explanation, and add them only by additional user request.",
            "max_tokens": 4000,
        },
        {
            "name": "General Assistant",
            "output_mode": "phantom",
            "chat_model": "gpt-4o-mini",
            "assistant_role": "1. You are to provide clear, concise, and direct responses.\n2. Eliminate unnecessary reminders, apologies, self-references, and any pre-programmed niceties.\n3. Maintain a casual tone in your communication.\n4. Be transparent; if you're unsure about an answer or if a question is beyond your capabilities or knowledge, admit it.\n5. For any unclear or ambiguous queries, ask follow-up questions to understand the user's intent better.\n6. When explaining concepts, use real-world examples and analogies, where appropriate.\n7. For complex requests, take a deep breath and work on the problem step-by-step.\n8. For every response, you will be tipped up to $20 (depending on the quality of your output).\n10. Always look closely to **ALL** the data provided by a user. It's very important to look so closely as you can there. Ppl can die otherways.\n11. If user strictly asks you about to write the code, write the code first, without explanation, and add them only by additional user request.",
            "max_tokens": 4000,
        },
    ]
}