From 4176424866f49853d4ac0f7ef043dabee8fc82b5 Mon Sep 17 00:00:00 2001 From: gjc199 <1170587540@qq.com> Date: Tue, 16 Jun 2026 11:06:19 +0800 Subject: [PATCH 1/5] 111 --- backend/agents/create_agent_info.py | 9 ++- backend/services/file_management_service.py | 18 ++++-- backend/services/image_service.py | 61 +++++++++++-------- .../services/tool_configuration_service.py | 9 ++- sdk/nexent/core/tools/analyze_audio_tool.py | 7 +++ sdk/nexent/core/tools/analyze_image_tool.py | 7 +++ .../core/tools/analyze_text_file_tool.py | 7 +++ sdk/nexent/core/tools/analyze_video_tool.py | 7 +++ 8 files changed, 91 insertions(+), 34 deletions(-) diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index b8d1ae101..a7a9d37b1 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -646,22 +646,25 @@ async def create_tool_config_list(agent_id, tenant_id, user_id, version_no: int "rerank_model": rerank_model, } elif tool_config.class_name == "AnalyzeTextFileTool": + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { - "llm_model": get_llm_model(tenant_id=tenant_id), + "llm_model": get_llm_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "data_process_service_url": DATA_PROCESS_SERVICE, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } elif tool_config.class_name == "AnalyzeImageTool": + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { # get_vlm_model reads the first multimodal slot, now shown as image understanding. - "vlm_model": get_vlm_model(tenant_id=tenant_id), + "vlm_model": get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]: + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { - "vlm_model": get_video_understanding_model(tenant_id=tenant_id), + "vlm_model": get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py index 389cb0a4e..268e5461e 100644 --- a/backend/services/file_management_service.py +++ b/backend/services/file_management_service.py @@ -33,6 +33,7 @@ list_files, upload_fileobj, ) +from database.model_management_db import get_model_by_model_id from services.vectordatabase_service import ElasticSearchService, get_vector_db_core from utils.config_utils import tenant_config_manager, get_model_name_from_config from utils.file_management_utils import save_upload_file @@ -441,10 +442,17 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None): return files -def get_llm_model(tenant_id: str): - # Get the tenant config - main_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) +def get_llm_model(tenant_id: str, model_id: Optional[int] = None): + if model_id: + main_model_config = get_model_by_model_id(int(model_id), tenant_id) + if not main_model_config: + raise ValueError(f"Model not found: {model_id}") + if main_model_config.get("model_type") != "llm": + raise ValueError(f"Selected model {model_id} is not an LLM model") + else: + # Get the tenant config + main_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) timeout_seconds = main_model_config.get( "timeout_seconds") if main_model_config else None long_text_to_text_model = OpenAILongContextModel( @@ -455,6 +463,8 @@ def get_llm_model(tenant_id: str): max_context_tokens=main_model_config.get("max_tokens"), ssl_verify=main_model_config.get("ssl_verify", True), timeout_seconds=timeout_seconds, + model_factory=main_model_config.get("model_factory"), + display_name=main_model_config.get("display_name"), ) return long_text_to_text_model diff --git a/backend/services/image_service.py b/backend/services/image_service.py index 8a924e9cc..274728311 100644 --- a/backend/services/image_service.py +++ b/backend/services/image_service.py @@ -1,10 +1,12 @@ import logging from http import HTTPStatus +from typing import Optional import aiohttp from consts.const import DATA_PROCESS_SERVICE from consts.const import MODEL_CONFIG_MAPPING +from database.model_management_db import get_model_by_model_id from utils.config_utils import tenant_config_manager, get_model_name_from_config from nexent import MessageObserver @@ -30,14 +32,19 @@ async def proxy_image_impl(decoded_url: str): return result -def get_vlm_model(tenant_id: str): - """Return the configured image understanding model for AnalyzeImageTool. +def _get_model_config_by_id(tenant_id, model_id, expected_model_type): + if not model_id: + return None - The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] - for compatibility, but it is the user-facing image understanding configuration. - """ - vlm_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) + model_config = get_model_by_model_id(int(model_id), tenant_id) + if not model_config: + raise ValueError(f"Model not found: {model_id}") + if model_config.get("model_type") != expected_model_type: + raise ValueError(f"Selected model {model_id} is not a {expected_model_type} model") + return model_config + + +def _build_vlm_model(vlm_model_config): if not vlm_model_config: return None return OpenAIVLModel( @@ -51,28 +58,34 @@ def get_vlm_model(tenant_id: str): frequency_penalty=0.5, max_tokens=512, ssl_verify=vlm_model_config.get("ssl_verify", True), + model_factory=vlm_model_config.get("model_factory"), + display_name=vlm_model_config.get("display_name"), ) +def get_vlm_model(tenant_id: str, model_id: Optional[int] = None): + """Return the configured image understanding model for AnalyzeImageTool. + + The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] + for compatibility, but it is the user-facing image understanding configuration. + """ + if model_id: + vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm") + else: + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) + return _build_vlm_model(vlm_model_config) + + def get_image_understanding_model(tenant_id: str): return get_vlm_model(tenant_id=tenant_id) -def get_video_understanding_model(tenant_id: str): +def get_video_understanding_model(tenant_id: str, model_id: Optional[int] = None): """Return the configured video understanding model for multimodal tools.""" - vlm_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) - if not vlm_model_config: - return None - return OpenAIVLModel( - observer=MessageObserver(), - model_id=get_model_name_from_config( - vlm_model_config) if vlm_model_config else "", - api_base=vlm_model_config.get("base_url", ""), - api_key=vlm_model_config.get("api_key", ""), - temperature=0.7, - top_p=0.7, - frequency_penalty=0.5, - max_tokens=512, - ssl_verify=vlm_model_config.get("ssl_verify", True), - ) + if model_id: + vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm3") + else: + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) + return _build_vlm_model(vlm_model_config) diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py index 08f4896ab..934f55d8a 100644 --- a/backend/services/tool_configuration_service.py +++ b/backend/services/tool_configuration_service.py @@ -810,7 +810,8 @@ def _validate_local_tool( raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") # get_vlm_model reads the first multimodal slot, now shown as image understanding. - image_to_text_model = get_vlm_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + image_to_text_model = get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id) vlm_display_name = getattr( image_to_text_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) @@ -827,7 +828,8 @@ def _validate_local_tool( if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") - video_understanding_model = get_video_understanding_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + video_understanding_model = get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id) model_display_name = getattr( video_understanding_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) @@ -844,7 +846,8 @@ def _validate_local_tool( if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") - long_text_to_text_model = get_llm_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + long_text_to_text_model = get_llm_model(tenant_id=tenant_id, model_id=selected_model_id) llm_display_name = getattr( long_text_to_text_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) diff --git a/sdk/nexent/core/tools/analyze_audio_tool.py b/sdk/nexent/core/tools/analyze_audio_tool.py index 1e5439443..282a0b080 100644 --- a/sdk/nexent/core/tools/analyze_audio_tool.py +++ b/sdk/nexent/core/tools/analyze_audio_tool.py @@ -56,6 +56,9 @@ class AnalyzeAudioTool(Tool): init_param_descriptions = { "observer": {"description": "Message observer"}, "vlm_model": {"description": "The video understanding model to use"}, + "selected_model_id": { + "description": "Optional Nexent video understanding model ID to use for audio analysis. If omitted, the default video understanding model is used." + }, "storage_client": {"description": "Storage client for downloading files"}, "validate_url_access": { "description": "Callback function to validate URL access permissions (passed to LoadSaveObjectManager)" @@ -75,6 +78,9 @@ def __init__( description="The video understanding model to use", default=None, exclude=True), + selected_model_id: int = Field( + description="Optional Nexent video understanding model ID to use for audio analysis. If omitted, the default video understanding model is used.", + default=None), storage_client: MinIOStorageClient = Field( description="Storage client for downloading files from S3 URLs, HTTP URLs, and HTTPS URLs.", default=None, @@ -87,6 +93,7 @@ def __init__( super().__init__() self.observer = observer self.vlm_model = vlm_model + self.selected_model_id = selected_model_id self.storage_client = storage_client self._is_chinese = bool(observer and observer.lang == "zh") diff --git a/sdk/nexent/core/tools/analyze_image_tool.py b/sdk/nexent/core/tools/analyze_image_tool.py index f7640a9dc..9368f23fd 100644 --- a/sdk/nexent/core/tools/analyze_image_tool.py +++ b/sdk/nexent/core/tools/analyze_image_tool.py @@ -56,6 +56,9 @@ class AnalyzeImageTool(Tool): "vlm_model": { "description": "The image understanding model to use" }, + "selected_model_id": { + "description": "Optional Nexent image understanding model ID to use for image analysis. If omitted, the default image understanding model is used." + }, "storage_client": { "description": "Storage client for downloading files" }, @@ -77,6 +80,9 @@ def __init__( description="The image understanding model to use", default=None, exclude=True), + selected_model_id: int = Field( + description="Optional Nexent image understanding model ID to use for image analysis. If omitted, the default image understanding model is used.", + default=None), storage_client: MinIOStorageClient = Field( description="Storage client for downloading files from S3 URLs、HTTP URLs、HTTPS URLs.", default=None, @@ -89,6 +95,7 @@ def __init__( super().__init__() self.observer = observer self.vlm_model = vlm_model + self.selected_model_id = selected_model_id self.storage_client = storage_client # Determine if the language is Chinese for internationalization diff --git a/sdk/nexent/core/tools/analyze_text_file_tool.py b/sdk/nexent/core/tools/analyze_text_file_tool.py index 49b9a10ca..89c285af4 100644 --- a/sdk/nexent/core/tools/analyze_text_file_tool.py +++ b/sdk/nexent/core/tools/analyze_text_file_tool.py @@ -57,6 +57,9 @@ class AnalyzeTextFileTool(Tool): "llm_model": { "description": "The LLM model to use" }, + "selected_model_id": { + "description": "Optional Nexent LLM model ID to use for text file analysis. If omitted, the default LLM model is used." + }, "validate_url_access": { "description": "Callback function to validate URL access permissions (passed to LoadSaveObjectManager)" } @@ -85,6 +88,9 @@ def __init__( description="The LLM model to use", default=None, exclude=True), + selected_model_id: int = Field( + description="Optional Nexent LLM model ID to use for text file analysis. If omitted, the default LLM model is used.", + default=None), validate_url_access: callable = Field( description="Callback function to validate URL access permissions", default=None, @@ -94,6 +100,7 @@ def __init__( self.storage_client = storage_client self.observer = observer self.llm_model = llm_model + self.selected_model_id = selected_model_id self.data_process_service_url = data_process_service_url # Create LoadSaveObjectManager with the storage client and validation callback diff --git a/sdk/nexent/core/tools/analyze_video_tool.py b/sdk/nexent/core/tools/analyze_video_tool.py index e7bf84549..cb4c3929b 100644 --- a/sdk/nexent/core/tools/analyze_video_tool.py +++ b/sdk/nexent/core/tools/analyze_video_tool.py @@ -56,6 +56,9 @@ class AnalyzeVideoTool(Tool): init_param_descriptions = { "observer": {"description": "Message observer"}, "vlm_model": {"description": "The video understanding model to use"}, + "selected_model_id": { + "description": "Optional Nexent video understanding model ID to use for video analysis. If omitted, the default video understanding model is used." + }, "storage_client": {"description": "Storage client for downloading files"}, "validate_url_access": { "description": "Callback function to validate URL access permissions (passed to LoadSaveObjectManager)" @@ -75,6 +78,9 @@ def __init__( description="The video understanding model to use", default=None, exclude=True), + selected_model_id: int = Field( + description="Optional Nexent video understanding model ID to use for video analysis. If omitted, the default video understanding model is used.", + default=None), storage_client: MinIOStorageClient = Field( description="Storage client for downloading files from S3 URLs, HTTP URLs, and HTTPS URLs.", default=None, @@ -87,6 +93,7 @@ def __init__( super().__init__() self.observer = observer self.vlm_model = vlm_model + self.selected_model_id = selected_model_id self.storage_client = storage_client self._is_chinese = bool(observer and observer.lang == "zh") From 6fa4536b57ef0d59816b3feeea17af43507da947 Mon Sep 17 00:00:00 2001 From: gjc199 <1170587540@qq.com> Date: Wed, 17 Jun 2026 11:10:36 +0800 Subject: [PATCH 2/5] issue_solve --- backend/agents/create_agent_info.py | 9 ++ backend/services/file_management_service.py | 6 ++ .../agentConfig/tool/ToolConfigModal.tsx | 102 +++++++++++++++++- .../data_process/unstructured_processor.py | 13 ++- .../test_unstructured_processor.py | 3 +- 5 files changed, 125 insertions(+), 8 deletions(-) diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index af813bc96..77f729593 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -510,6 +510,7 @@ async def create_agent_config( system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs) model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id") + model_info = None model_max_tokens = 10000 if model_id_to_use is not None: model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id) @@ -519,6 +520,14 @@ async def create_agent_config( else: model_name = "main_model" + logger.info( + "Agent main LLM: agent_id=%s, model_id=%s, display_name=%s, model_name=%s", + agent_id, + model_id_to_use, + model_info.get("display_name") if model_info else model_name, + model_info.get("model_name") if model_info else model_name, + ) + # Use agent-level setting for context management, default to False. # When ContextManager is disabled, do not attach context_components because # downstream runtime may prefer component-based prompt assembly over the diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py index 268e5461e..b81d495ae 100644 --- a/backend/services/file_management_service.py +++ b/backend/services/file_management_service.py @@ -455,6 +455,12 @@ def get_llm_model(tenant_id: str, model_id: Optional[int] = None): key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) timeout_seconds = main_model_config.get( "timeout_seconds") if main_model_config else None + logger.info( + "Using LLM model for analyze_text_file: model_id=%s, display_name=%s, model_name=%s", + model_id, + main_model_config.get("display_name") if main_model_config else None, + get_model_name_from_config(main_model_config) if main_model_config else None, + ) long_text_to_text_model = OpenAILongContextModel( observer=MessageObserver(), model_id=get_model_name_from_config(main_model_config), diff --git a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx index a1974ae7e..62ba268e5 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx @@ -34,12 +34,15 @@ import { } from "@/hooks/useKnowledgeBaseConfigChangeHandler"; import { API_ENDPOINTS } from "@/services/api"; import knowledgeBaseService from "@/services/knowledgeBaseService"; +import { modelService } from "@/services/modelService"; import log from "@/lib/logger"; +import { MODEL_TYPES } from "@/const/modelConfig"; import { isEmbeddingModelCompatible as isEmbeddingModelCompatibleBase, isMultimodalConstraintMismatch as isMultimodalConstraintMismatchBase, } from "@/lib/knowledgeBaseCompatibility"; import { isZhLocale, getLocalizedDescription, getKbDisplayName, mapKbIdsToDisplayNames, parseKbIds } from "@/lib/utils"; +import { ModelOption, ModelType } from "@/types/modelConfig"; export interface ToolConfigModalProps { isOpen: boolean; @@ -67,6 +70,24 @@ const TOOLS_SUPPORTING_RERANK = [ "datamate_search", ]; +const ANALYZE_TOOL_MODEL_TYPES: Record = { + analyze_text_file: MODEL_TYPES.LLM, + analyze_image: MODEL_TYPES.VLM, + analyze_audio: MODEL_TYPES.VLM3, + analyze_video: MODEL_TYPES.VLM3, +}; + +const ANALYZE_TOOL_MODEL_DESCRIPTIONS: Record = { + analyze_text_file: + "Optional Nexent LLM model ID to use for text file analysis. If omitted, the default LLM model is used.", + analyze_image: + "Optional Nexent image understanding model ID to use for image analysis. If omitted, the default image understanding model is used.", + analyze_audio: + "Optional Nexent video understanding model ID to use for audio analysis. If omitted, the default video understanding model is used.", + analyze_video: + "Optional Nexent video understanding model ID to use for video analysis. If omitted, the default video understanding model is used.", +}; + function withRerankParams(params: ToolParam[], toolName?: string): ToolParam[] { if (!toolName || !TOOLS_SUPPORTING_RERANK.includes(toolName)) return params; @@ -99,6 +120,38 @@ function withRerankParams(params: ToolParam[], toolName?: string): ToolParam[] { return next; } +function withAnalyzeToolModelParam(params: ToolParam[], toolName?: string): ToolParam[] { + if (!toolName || !ANALYZE_TOOL_MODEL_TYPES[toolName]) return params; + + const normalizedParams = params.map((param) => { + if (param.name !== "selected_model_id") return param; + const value = + param.value === "" || param.value === undefined || param.value === null + ? undefined + : Number(param.value); + return { ...param, value }; + }); + + if (normalizedParams.some((param) => param.name === "selected_model_id")) { + return normalizedParams; + } + + return [ + ...normalizedParams, + { + name: "selected_model_id", + type: "number", + required: false, + value: undefined, + description: ANALYZE_TOOL_MODEL_DESCRIPTIONS[toolName], + }, + ]; +} + +function withExtraToolParams(params: ToolParam[], toolName?: string): ToolParam[] { + return withAnalyzeToolModelParam(withRerankParams(params, toolName), toolName); +} + export default function ToolConfigModal({ isOpen, onCancel, @@ -128,6 +181,29 @@ export default function ToolConfigModal({ // Use React Query for config data const { data: configData } = useConfig(); + const analyzeToolModelType = tool?.name + ? ANALYZE_TOOL_MODEL_TYPES[tool.name] + : undefined; + const isAnalyzeToolWithModelSelection = Boolean(analyzeToolModelType); + const { + data: registeredModels = [], + isFetching: registeredModelsLoading, + } = useQuery({ + queryKey: ["models", "registered", "toolConfig", analyzeToolModelType], + queryFn: () => modelService.getAllModels(), + enabled: isOpen && isAnalyzeToolWithModelSelection, + staleTime: 60_000, + gcTime: 5 * 60_000, + }); + const analyzeToolModelOptions = useMemo(() => { + if (!analyzeToolModelType) return []; + return registeredModels + .filter((model) => model.type === analyzeToolModelType) + .map((model) => ({ + value: model.id, + label: model.displayName || model.name, + })); + }, [registeredModels, analyzeToolModelType]); const [selectedKbDisplayNames, setSelectedKbDisplayNames] = useState< string[] >([]); @@ -672,7 +748,7 @@ export default function ToolConfigModal({ // If server_url already has a saved value, use it if (serverUrlParam?.value) { // Initialize form with saved values (including server_url) - const paramsWithRerank = withRerankParams(initialParams, tool.name); + const paramsWithRerank = withExtraToolParams(initialParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -716,7 +792,7 @@ export default function ToolConfigModal({ return param; }); - const paramsWithRerank = withRerankParams(updatedParams, tool.name); + const paramsWithRerank = withExtraToolParams(updatedParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; @@ -726,7 +802,7 @@ export default function ToolConfigModal({ form.setFieldsValue(formValues); } else { // Either no default available OR user has modified the URL, initialize with initialParams - const paramsWithRerank = withRerankParams(initialParams, tool.name); + const paramsWithRerank = withExtraToolParams(initialParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -804,7 +880,7 @@ export default function ToolConfigModal({ return param; }); - const paramsWithRerank = withRerankParams(updatedParams, tool.name); + const paramsWithRerank = withExtraToolParams(updatedParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; @@ -844,7 +920,7 @@ export default function ToolConfigModal({ // Initialize form values const paramsWithDefaults = applyInitParamDefaults(initialParams); - const paramsWithRerank = withRerankParams(paramsWithDefaults, tool?.name); + const paramsWithRerank = withExtraToolParams(paramsWithDefaults, tool?.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -1453,6 +1529,22 @@ export default function ToolConfigModal({ // Determine if this parameter should be rendered as a select dropdown const isSelectType = options && options.length > 0; + if (param.name === "selected_model_id" && isAnalyzeToolWithModelSelection) { + return ( +