diff --git a/apps/setting/models_provider/constants/model_provider_constants.py b/apps/setting/models_provider/constants/model_provider_constants.py index c471cead48b..94f4ef3b4eb 100644 --- a/apps/setting/models_provider/constants/model_provider_constants.py +++ b/apps/setting/models_provider/constants/model_provider_constants.py @@ -10,6 +10,7 @@ from setting.models_provider.impl.aliyun_bai_lian_model_provider.aliyun_bai_lian_model_provider import \ AliyunBaiLianModelProvider +from setting.models_provider.impl.anthropic_model_provider.anthropic_model_provider import AnthropicModelProvider from setting.models_provider.impl.aws_bedrock_model_provider.aws_bedrock_model_provider import BedrockModelProvider from setting.models_provider.impl.azure_model_provider.azure_model_provider import AzureModelProvider from setting.models_provider.impl.deepseek_model_provider.deepseek_model_provider import DeepSeekModelProvider @@ -47,3 +48,4 @@ class ModelProvideConstants(Enum): model_xinference_provider = XinferenceModelProvider() model_vllm_provider = VllmModelProvider() aliyun_bai_lian_model_provider = AliyunBaiLianModelProvider() + model_anthropic_provider = AnthropicModelProvider() diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/__init__.py b/apps/setting/models_provider/impl/anthropic_model_provider/__init__.py new file mode 100644 index 00000000000..2dc4ab10db4 --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/__init__.py @@ -0,0 +1,8 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:虎 + @file: __init__.py.py + @date:2024/3/28 16:25 + @desc: +""" diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/anthropic_model_provider.py b/apps/setting/models_provider/impl/anthropic_model_provider/anthropic_model_provider.py new file mode 100644 index 00000000000..7b3f91f1a32 --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/anthropic_model_provider.py @@ -0,0 +1,62 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:虎 + @file: openai_model_provider.py + @date:2024/3/28 16:26 + @desc: +""" +import os + +from common.util.file_util import get_file_content +from setting.models_provider.base_model_provider import IModelProvider, ModelProvideInfo, ModelInfo, \ + ModelTypeConst, ModelInfoManage +from setting.models_provider.impl.anthropic_model_provider.credential.image import AnthropicImageModelCredential +from setting.models_provider.impl.anthropic_model_provider.credential.llm import AnthropicLLMModelCredential +from setting.models_provider.impl.anthropic_model_provider.model.image import AnthropicImage +from setting.models_provider.impl.anthropic_model_provider.model.llm import AnthropicChatModel +from smartdoc.conf import PROJECT_DIR + +openai_llm_model_credential = AnthropicLLMModelCredential() +openai_image_model_credential = AnthropicImageModelCredential() + +model_info_list = [ + ModelInfo('claude-3-opus-20240229', '', ModelTypeConst.LLM, + openai_llm_model_credential, AnthropicChatModel + ), + ModelInfo('claude-3-sonnet-20240229', '', ModelTypeConst.LLM, openai_llm_model_credential, + AnthropicChatModel), + ModelInfo('claude-3-haiku-20240307', '', ModelTypeConst.LLM, openai_llm_model_credential, + AnthropicChatModel), + ModelInfo('claude-3-5-sonnet-20240620', '', ModelTypeConst.LLM, openai_llm_model_credential, + AnthropicChatModel), + ModelInfo('claude-3-5-haiku-20241022', '', ModelTypeConst.LLM, openai_llm_model_credential, + AnthropicChatModel), + ModelInfo('claude-3-5-sonnet-20241022', '', ModelTypeConst.LLM, openai_llm_model_credential, + AnthropicChatModel), +] + +image_model_info = [ + ModelInfo('claude-3-5-sonnet-20241022', '', ModelTypeConst.IMAGE, openai_image_model_credential, + AnthropicImage), +] + +model_info_manage = ( + ModelInfoManage.builder() + .append_model_info_list(model_info_list) + .append_default_model_info(model_info_list[0]) + .append_model_info_list(image_model_info) + .append_default_model_info(image_model_info[0]) + .build() +) + + +class AnthropicModelProvider(IModelProvider): + + def get_model_info_manage(self): + return model_info_manage + + def get_model_provide_info(self): + return ModelProvideInfo(provider='model_anthropic_provider', name='Anthropic', icon=get_file_content( + os.path.join(PROJECT_DIR, "apps", "setting", 'models_provider', 'impl', 'anthropic_model_provider', 'icon', + 'anthropic_icon_svg'))) diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/credential/image.py b/apps/setting/models_provider/impl/anthropic_model_provider/credential/image.py new file mode 100644 index 00000000000..0a380acbb90 --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/credential/image.py @@ -0,0 +1,65 @@ +# coding=utf-8 +import base64 +import os +from typing import Dict + +from langchain_core.messages import HumanMessage + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + +class AnthropicImageModelParams(BaseForm): + temperature = forms.SliderField(TooltipLabel('温度', '较高的数值会使输出更加随机,而较低的数值会使其更加集中和确定'), + required=True, default_value=0.7, + _min=0.1, + _max=1.0, + _step=0.01, + precision=2) + + max_tokens = forms.SliderField( + TooltipLabel('输出最大Tokens', '指定模型可生成的最大token个数'), + required=True, default_value=800, + _min=1, + _max=100000, + _step=1, + precision=0) + + + +class AnthropicImageModelCredential(BaseForm, BaseModelCredential): + api_base = forms.TextInputField('API 域名', required=True) + api_key = forms.PasswordInputField('API Key', required=True) + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + res = model.stream([HumanMessage(content=[{"type": "text", "text": "你好"}])]) + for chunk in res: + print(chunk) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + def get_model_params_setting_form(self, model_name): + return AnthropicImageModelParams() diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/credential/llm.py b/apps/setting/models_provider/impl/anthropic_model_provider/credential/llm.py new file mode 100644 index 00000000000..2abf1ff77d2 --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/credential/llm.py @@ -0,0 +1,69 @@ +# coding=utf-8 +""" + @project: MaxKB + @Author:虎 + @file: llm.py + @date:2024/7/11 18:32 + @desc: +""" +from typing import Dict + +from langchain_core.messages import HumanMessage + +from common import forms +from common.exception.app_exception import AppApiException +from common.forms import BaseForm, TooltipLabel +from setting.models_provider.base_model_provider import BaseModelCredential, ValidCode + + +class AnthropicLLMModelParams(BaseForm): + temperature = forms.SliderField(TooltipLabel('温度', '较高的数值会使输出更加随机,而较低的数值会使其更加集中和确定'), + required=True, default_value=0.7, + _min=0.1, + _max=1.0, + _step=0.01, + precision=2) + + max_tokens = forms.SliderField( + TooltipLabel('输出最大Tokens', '指定模型可生成的最大token个数'), + required=True, default_value=800, + _min=1, + _max=100000, + _step=1, + precision=0) + + +class AnthropicLLMModelCredential(BaseForm, BaseModelCredential): + + def is_valid(self, model_type: str, model_name, model_credential: Dict[str, object], model_params, provider, + raise_exception=False): + model_type_list = provider.get_model_type_list() + if not any(list(filter(lambda mt: mt.get('value') == model_type, model_type_list))): + raise AppApiException(ValidCode.valid_error.value, f'{model_type} 模型类型不支持') + + for key in ['api_base', 'api_key']: + if key not in model_credential: + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'{key} 字段为必填字段') + else: + return False + try: + model = provider.get_model(model_type, model_name, model_credential) + model.invoke([HumanMessage(content='你好')]) + except Exception as e: + if isinstance(e, AppApiException): + raise e + if raise_exception: + raise AppApiException(ValidCode.valid_error.value, f'校验失败,请检查参数是否正确: {str(e)}') + else: + return False + return True + + def encryption_dict(self, model: Dict[str, object]): + return {**model, 'api_key': super().encryption(model.get('api_key', ''))} + + api_base = forms.TextInputField('API 域名', required=True) + api_key = forms.PasswordInputField('API Key', required=True) + + def get_model_params_setting_form(self, model_name): + return AnthropicLLMModelParams() diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/model/image.py b/apps/setting/models_provider/impl/anthropic_model_provider/model/image.py new file mode 100644 index 00000000000..9582522cc6e --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/model/image.py @@ -0,0 +1,26 @@ +from typing import Dict + +from langchain_anthropic import ChatAnthropic + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class AnthropicImage(MaxKBBaseModel, ChatAnthropic): + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs) + return AnthropicImage( + model=model_name, + anthropic_api_url=model_credential.get('api_base'), + anthropic_api_key=model_credential.get('api_key'), + # stream_options={"include_usage": True}, + streaming=True, + **optional_params, + ) diff --git a/apps/setting/models_provider/impl/anthropic_model_provider/model/llm.py b/apps/setting/models_provider/impl/anthropic_model_provider/model/llm.py new file mode 100644 index 00000000000..de055e1044e --- /dev/null +++ b/apps/setting/models_provider/impl/anthropic_model_provider/model/llm.py @@ -0,0 +1,53 @@ +# coding=utf-8 +""" + @project: maxkb + @Author:虎 + @file: llm.py + @date:2024/4/18 15:28 + @desc: +""" +from typing import List, Dict + +from langchain_anthropic import ChatAnthropic +from langchain_core.messages import BaseMessage, get_buffer_string + +from common.config.tokenizer_manage_config import TokenizerManage +from setting.models_provider.base_model_provider import MaxKBBaseModel + + +def custom_get_token_ids(text: str): + tokenizer = TokenizerManage.get_tokenizer() + return tokenizer.encode(text) + + +class AnthropicChatModel(MaxKBBaseModel, ChatAnthropic): + + @staticmethod + def is_cache_model(): + return False + + @staticmethod + def new_instance(model_type, model_name, model_credential: Dict[str, object], **model_kwargs): + optional_params = MaxKBBaseModel.filter_optional_params(model_kwargs) + azure_chat_open_ai = AnthropicChatModel( + model=model_name, + anthropic_api_url=model_credential.get('api_base'), + anthropic_api_key=model_credential.get('api_key'), + **optional_params, + custom_get_token_ids=custom_get_token_ids + ) + return azure_chat_open_ai + + def get_num_tokens_from_messages(self, messages: List[BaseMessage]) -> int: + try: + return super().get_num_tokens_from_messages(messages) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return sum([len(tokenizer.encode(get_buffer_string([m]))) for m in messages]) + + def get_num_tokens(self, text: str) -> int: + try: + return super().get_num_tokens(text) + except Exception as e: + tokenizer = TokenizerManage.get_tokenizer() + return len(tokenizer.encode(text)) diff --git a/pyproject.toml b/pyproject.toml index 34cd3891aa9..386c9d96f14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ httpx = "^0.27.0" httpx-sse = "^0.4.0" websockets = "^13.0" langchain-google-genai = "^1.0.3" +langchain-anthropic= "^0.1.0" openpyxl = "^3.1.2" xlrd = "^2.0.1" gunicorn = "^22.0.0"