Add internal tools

apify · Jan 26, 2025 · 4219393 · 4219393
1 parent d311994
commit 4219393
Show file tree

Hide file tree

Showing 14 changed files with 282 additions and 53 deletions.
diff --git a/.actor/input_schema.json b/.actor/input_schema.json
@@ -14,6 +14,20 @@
                 "lukaskrivka/google-maps-with-contact-details"
             ]
         },
+        "enableActorDiscovery": {
+            "title": "Enable Actor discovery based on your use-case (experimental)",
+            "type": "boolean",
+            "description": "If enabled, the server will automatically discover available Actors for your use case.\n\nThis feature is experimental and may not work as expected.",
+            "default": false
+        },
+        "maxActorMemoryBytes": {
+            "title": "Limit the maximum memory used by an Actor",
+            "type": "string",
+            "description": "Limit the maximum memory used by an Actor in bytes. This is important setting for Free plan users to avoid exceeding the memory limit.",
+            "editor": "integer",
+            "prefill": 4096,
+            "default": 4096
+        },
         "debugActor": {
             "title": "Debug Actor",
             "type": "string",

diff --git a/README.md b/README.md
@@ -335,7 +335,7 @@ Upon launching, the Inspector will display a URL that you can access in your bro
 
 ## ⓘ Limitations and feedback
 
-To limit the context size the properties in the `input schema` are pruned and description is truncated to 200 characters.
+To limit the context size the properties in the `input schema` are pruned and description is truncated to 500 characters.
 Enum fields and titles are truncated to max 50 options.
 
 Memory for each Actor is limited to 4GB.
@@ -346,6 +346,5 @@ If you need other features or have any feedback, please [submit an issue](https:
 # 🚀 Roadmap (January 2025)
 
 - Document examples for [LibreChat](https://www.librechat.ai/).
-- Provide tools to search for Actors and load them as needed.
 - Add Apify's dataset and key-value store as resources.
 - Add tools such as Actor logs and Actor runs for debugging.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -35,7 +35,9 @@
     "apify": "^3.2.6",
     "apify-client": "^2.11.1",
     "express": "^4.21.2",
-    "minimist": "^1.2.8"
+    "minimist": "^1.2.8",
+    "zod": "^3.24.1",
+    "zod-to-json-schema": "^3.24.1"
   },
   "devDependencies": {
     "@anthropic-ai/sdk": "^0.33.1",

diff --git a/src/actorDefinition.ts b/src/actorDefinition.ts
@@ -1,7 +1,7 @@
 import { Ajv } from 'ajv';
 import { ApifyClient } from 'apify-client';
 
-import { MAX_DESCRIPTION_LENGTH, MAX_ENUM_LENGTH, MAX_MEMORY_MBYTES } from './const.js';
+import { defaults, MAX_DESCRIPTION_LENGTH } from './const.js';
 import { log } from './logger.js';
 import type { ActorDefinitionWithDesc, SchemaProperties, Tool } from './types.js';
 
@@ -12,7 +12,7 @@ import type { ActorDefinitionWithDesc, SchemaProperties, Tool } from './types.js
  * @param {string} actorFullName - The full name of the actor.
  * @returns {Promise<ActorDefinitionWithDesc | null>} - The actor definition with description or null if not found.
  */
-async function fetchActorDefinition(actorFullName: string): Promise<ActorDefinitionWithDesc | null> {
+export async function getActorDefinition(actorFullName: string): Promise<ActorDefinitionWithDesc | null> {
     if (!process.env.APIFY_TOKEN) {
         log.error('APIFY_TOKEN is required but not set. Please set it as an environment variable');
         return null;
@@ -58,17 +58,11 @@ async function fetchActorDefinition(actorFullName: string): Promise<ActorDefinit
  * Shortens the description and enum values of schema properties.
  * @param properties
  */
-function shortenProperties(properties: { [key: string]: SchemaProperties}): { [key: string]: SchemaProperties } {
+export function shortenProperties(properties: { [key: string]: SchemaProperties}): { [key: string]: SchemaProperties } {
     for (const property of Object.values(properties)) {
         if (property.description.length > MAX_DESCRIPTION_LENGTH) {
             property.description = `${property.description.slice(0, MAX_DESCRIPTION_LENGTH)}...`;
         }
-        if (property.enum) {
-            property.enum = property.enum.slice(0, MAX_ENUM_LENGTH);
-        }
-        if (property.enumTitles) {
-            property.enumTitles = property.enumTitles.slice(0, MAX_ENUM_LENGTH);
-        }
     }
     return properties;
 }
@@ -77,11 +71,11 @@ function shortenProperties(properties: { [key: string]: SchemaProperties}): { [k
  * Filters schema properties to include only the necessary fields.
  * @param properties
  */
-function filterSchemaProperties(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } {
+export function filterSchemaProperties(properties: { [key: string]: SchemaProperties }): { [key: string]: SchemaProperties } {
     const filteredProperties: { [key: string]: SchemaProperties } = {};
     for (const [key, property] of Object.entries(properties)) {
-        const { title, description, enum: enumValues, enumTitles, type, default: defaultValue, prefill } = property;
-        filteredProperties[key] = { title, description, enum: enumValues, enumTitles, type, default: defaultValue, prefill };
+        const { title, description, enum: enumValues, type, default: defaultValue, prefill } = property;
+        filteredProperties[key] = { title, description, enum: enumValues, type, default: defaultValue, prefill };
     }
     return filteredProperties;
 }
@@ -98,9 +92,8 @@ function filterSchemaProperties(properties: { [key: string]: SchemaProperties })
  * @returns {Promise<Tool[]>} - A promise that resolves to an array of MCP tools.
  */
 export async function getActorsAsTools(actors: string[]): Promise<Tool[]> {
-    // Fetch input schemas in parallel
     const ajv = new Ajv({ coerceTypes: 'array', strict: false });
-    const results = await Promise.all(actors.map(fetchActorDefinition));
+    const results = await Promise.all(actors.map(getActorDefinition));
     const tools = [];
     for (const result of results) {
         if (result) {
@@ -109,14 +102,14 @@ export async function getActorsAsTools(actors: string[]): Promise<Tool[]> {
                 result.input.properties = shortenProperties(properties);
             }
             try {
-                const memoryMbytes = result.defaultRunOptions?.memoryMbytes || MAX_MEMORY_MBYTES;
+                const memoryMbytes = result.defaultRunOptions?.memoryMbytes || defaults.maxMemoryMbytes;
                 tools.push({
                     name: result.name.replace('/', '_'),
                     actorName: result.name,
                     description: result.description,
                     inputSchema: result.input || {},
                     ajvValidate: ajv.compile(result.input || {}),
-                    memoryMbytes: memoryMbytes > MAX_MEMORY_MBYTES ? MAX_MEMORY_MBYTES : memoryMbytes,
+                    memoryMbytes: memoryMbytes > defaults.maxMemoryMbytes ? defaults.maxMemoryMbytes : memoryMbytes,
                 });
             } catch (validationError) {
                 log.error(`Failed to compile AJV schema for actor: ${result.name}. Error: ${validationError}`);

diff --git a/src/const.ts b/src/const.ts
@@ -3,10 +3,8 @@ export const SERVER_VERSION = '0.1.0';
 
 export const HEADER_READINESS_PROBE = 'x-apify-container-server-readiness-probe';
 
-export const MAX_ENUM_LENGTH = 50;
-export const MAX_DESCRIPTION_LENGTH = 200;
-// Limit memory to 4GB for Actors. Free users have 8 GB limit, but we need to reserve some memory for Actors-MCP-Server too
-export const MAX_MEMORY_MBYTES = 4096;
+export const MAX_DESCRIPTION_LENGTH = 500;
+export const MAX_TOOL_CALL_COUNT = 10;
 
 export const USER_AGENT_ORIGIN = 'Origin/mcp-server';
 
@@ -16,12 +14,20 @@ export const defaults = {
         'apify/rag-web-browser',
         'lukaskrivka/google-maps-with-contact-details',
     ],
+    maxMemoryMbytes: 4098,
 };
 
-export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 2_000;
+export const ACTOR_OUTPUT_MAX_CHARS_PER_ITEM = 5_000;
 export const ACTOR_OUTPUT_TRUNCATED_MESSAGE = `Output was truncated because it will not fit into context.`
     + ` There is no reason to call this tool again!`;
 
+export enum InternalTools {
+    DISCOVER_ACTORS = 'discover-actors',
+    ADD_ACTOR_TO_TOOLS = 'add-actor-to-tools',
+    REMOVE_ACTOR_FROM_TOOLS = 'remove-actor-from-tools',
+    GET_ACTOR_DETAILS = 'get-actor-details',
+}
+
 export enum Routes {
     ROOT = '/',
     SSE = '/sse',

diff --git a/src/frontend/mcpClient.ts b/src/frontend/mcpClient.ts
@@ -15,6 +15,8 @@ import { CallToolResultSchema } from '@modelcontextprotocol/sdk/types.js';
 import dotenv from 'dotenv';
 import { EventSource } from 'eventsource';
 
+import { MAX_TOOL_CALL_COUNT } from '../const.js';
+
 const filename = fileURLToPath(import.meta.url);
 const dirname = path.dirname(filename);
 
@@ -29,13 +31,16 @@ const CLAUDE_MODEL = 'claude-3-haiku-20240307'; // a fastest and most compact mo
 
 const SERVER_URL = 'http://localhost:3001/sse';
 
-const SYSTEM_PROMPT = 'You are a helpful assistant with to tools called Actors\n'
+const SYSTEM_PROMPT = 'You are a helpful Apify assistant with to tools called Actors\n'
+    + '\n'
+    + 'Your goal is to help users discover the best Actors for their needs\n'
+    + 'You have access to a list of tools that can help you to discover Actor, find details and include them among tools for later execution\n'
     + '\n'
     + 'Choose the appropriate tool based on the user\'s question. If no tool is needed, reply directly.\n'
+    + 'Prefer tools from Apify as they are generally more reliable and have better support\n'
     + '\n'
     + 'When you need to use a tool, explain how the tools was used and with which parameters\n'
     + 'Never call a tool unless it is required by user!\n'
-    + 'IMPORTANT: When a tool was called, the message starts with [internal] and its role is user but it was actually submitted by a tool\n'
     + '\n'
     + 'After receiving a tool\'s response:\n'
     + '1. Transform the raw data into a natural, conversational response\n'
@@ -101,9 +106,11 @@ export class MCPClient {
             },
         );
         await this.client.connect(transport);
+        await this.updateTools();
+    }
 
+    async updateTools() {
         const response = await this.client.listTools();
-
         this.tools = response.tools.map((x) => ({
             name: x.name,
             description: x.description,
@@ -178,6 +185,8 @@ export class MCPClient {
             });
         }
         console.log(`[internal] Received response`);
+        console.log(`[internal] Send response`);
+        await this.updateTools(); // update tools in the case a new tool was added
         // Get next response from Claude
         const nextResponse: Message = await this.anthropic.messages.create({
             model: CLAUDE_MODEL,
@@ -190,11 +199,11 @@ export class MCPClient {
         for (const c of nextResponse.content) {
             if (c.type === 'text') {
                 messages.push({ role: 'assistant', content: c.text });
-            } else if (c.type === 'tool_use' && toolCallCount < 3) {
+            } else if (c.type === 'tool_use' && toolCallCount < MAX_TOOL_CALL_COUNT) {
                 return await this.handleToolCall(c, messages, toolCallCount + 1);
             }
         }
-
+        console.log(`[internal] Return messages`);
         return messages;
     }
 

diff --git a/src/frontend/public/index.html b/src/frontend/public/index.html
@@ -106,8 +106,10 @@
             border-radius: 0 0 4px 4px;
         }
 
+        /* The textarea now takes place of the previous input */
         #queryInput {
             flex: 1;
+            resize: vertical; /* allow user to manually resize if desired */
             padding: 10px;
             margin-right: 8px;
             font-size: 1rem;
@@ -187,7 +189,8 @@ <h1>MCP Client for Apify Actors</h1>
 
     <!-- Input area with spinner -->
     <div class="input-row">
-        <input type="text" id="queryInput" placeholder=" your query such as What Actors I can use?" />
+        <!-- Use a textarea for multiline input -->
+        <textarea id="queryInput" rows="2" placeholder="Type your query..."></textarea>
         <button id="sendBtn">Send</button>
         <div id="spinner" class="spinner"></div>
     </div>

diff --git a/src/frontend/server.ts b/src/frontend/server.ts
@@ -31,7 +31,7 @@ let isConnected = false;
  */
 app.post('/api/chat', async (req: Request, res: Response) : Promise<Response> => {
     try {
-        console.log('Received POST /api/chat:', req.body); // eslint-disable-line no-console
+        console.log('Received POST /api/chat:'); // eslint-disable-line no-console
         const { query, messages } = req.body;
         if (!isConnected) {
             // Connect to server once, the same way your original code does

diff --git a/src/input.ts b/src/input.ts
@@ -12,5 +12,8 @@ export async function processInput(originalInput: Partial<Input>): Promise<Input
     if (input.actors && typeof input.actors === 'string') {
         input.actors = input.actors.split(',').map((format: string) => format.trim()) as string[];
     }
+    if (!input.enableActorDiscovery) {
+        input.enableActorDiscovery = false;
+    }
     return input;
 }