diff --git a/backend/agents/create_agent_info.py b/backend/agents/create_agent_info.py index 7e3b42e28..17eb17484 100644 --- a/backend/agents/create_agent_info.py +++ b/backend/agents/create_agent_info.py @@ -578,6 +578,7 @@ async def create_agent_config( system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs) model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id") + model_info = None model_max_tokens = 10000 if model_id_to_use is not None: model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id) @@ -587,6 +588,14 @@ async def create_agent_config( else: model_name = "main_model" + logger.info( + "Agent main LLM: agent_id=%s, model_id=%s, display_name=%s, model_name=%s", + agent_id, + model_id_to_use, + model_info.get("display_name") if model_info else model_name, + model_info.get("model_name") if model_info else model_name, + ) + # Use agent-level setting for context management, default to False. # When ContextManager is disabled, do not attach context_components because # downstream runtime may prefer component-based prompt assembly over the @@ -759,22 +768,25 @@ async def create_tool_config_list( "rerank_model": rerank_model, } elif tool_config.class_name == "AnalyzeTextFileTool": + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { - "llm_model": get_llm_model(tenant_id=tenant_id), + "llm_model": get_llm_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "data_process_service_url": DATA_PROCESS_SERVICE, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } elif tool_config.class_name == "AnalyzeImageTool": + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { # get_vlm_model reads the first multimodal slot, now shown as image understanding. - "vlm_model": get_vlm_model(tenant_id=tenant_id), + "vlm_model": get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]: + selected_model_id = param_dict.get("selected_model_id") tool_config.metadata = { - "vlm_model": get_video_understanding_model(tenant_id=tenant_id), + "vlm_model": get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id), "storage_client": minio_client, "validate_url_access": lambda urls: validate_urls_access(urls, user_id) } diff --git a/backend/services/file_management_service.py b/backend/services/file_management_service.py index 585669c0c..64f7ac486 100644 --- a/backend/services/file_management_service.py +++ b/backend/services/file_management_service.py @@ -33,6 +33,7 @@ list_files, upload_fileobj, ) +from database.model_management_db import get_model_by_model_id from services.vectordatabase_service import ElasticSearchService, get_vector_db_core from utils.config_utils import tenant_config_manager, get_model_name_from_config from utils.file_management_utils import save_upload_file @@ -448,20 +449,39 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None): return files -def get_llm_model(tenant_id: str): - # Get the tenant config - main_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) +def get_llm_model(tenant_id: str, model_id: Optional[int] = None): + if model_id: + main_model_config = get_model_by_model_id(int(model_id), tenant_id) + if not main_model_config: + raise ValueError(f"Model not found: {model_id}") + if main_model_config.get("model_type") != "llm": + raise ValueError(f"Selected model {model_id} is not an LLM model") + else: + # Get the tenant config + main_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id) timeout_seconds = main_model_config.get( "timeout_seconds") if main_model_config else None + + resolved_model_name = get_model_name_from_config(main_model_config) + + logger.info( + "Using LLM model for analyze_text_file: model_id=%s, display_name=%s, model_name=%s", + model_id, + main_model_config.get("display_name") if main_model_config else None, + resolved_model_name + ) + long_text_to_text_model = OpenAILongContextModel( observer=MessageObserver(), - model_id=get_model_name_from_config(main_model_config), + model_id=resolved_model_name, api_base=main_model_config.get("base_url"), api_key=main_model_config.get("api_key"), max_context_tokens=main_model_config.get("max_tokens"), ssl_verify=main_model_config.get("ssl_verify", True), timeout_seconds=timeout_seconds, + model_factory=main_model_config.get("model_factory"), + display_name=main_model_config.get("display_name"), ) return long_text_to_text_model diff --git a/backend/services/image_service.py b/backend/services/image_service.py index fdef3b081..76790dc23 100644 --- a/backend/services/image_service.py +++ b/backend/services/image_service.py @@ -3,12 +3,14 @@ import logging import socket from http import HTTPStatus +from typing import Optional from urllib.parse import urlparse, urlunparse import aiohttp from consts.const import DATA_PROCESS_SERVICE from consts.const import MODEL_CONFIG_MAPPING +from database.model_management_db import get_model_by_model_id from utils.config_utils import tenant_config_manager, get_model_name_from_config from nexent import MessageObserver @@ -146,14 +148,19 @@ async def proxy_image_impl(decoded_url: str): return result -def get_vlm_model(tenant_id: str): - """Return the configured image understanding model for AnalyzeImageTool. +def _get_model_config_by_id(tenant_id, model_id, expected_model_type): + if not model_id: + return None - The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] - for compatibility, but it is the user-facing image understanding configuration. - """ - vlm_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) + model_config = get_model_by_model_id(int(model_id), tenant_id) + if not model_config: + raise ValueError(f"Model not found: {model_id}") + if model_config.get("model_type") != expected_model_type: + raise ValueError(f"Selected model {model_id} is not a {expected_model_type} model") + return model_config + + +def _build_vlm_model(vlm_model_config): if not vlm_model_config: return None return OpenAIVLModel( @@ -167,28 +174,34 @@ def get_vlm_model(tenant_id: str): frequency_penalty=0.5, max_tokens=512, ssl_verify=vlm_model_config.get("ssl_verify", True), + model_factory=vlm_model_config.get("model_factory"), + display_name=vlm_model_config.get("display_name"), ) +def get_vlm_model(tenant_id: str, model_id: Optional[int] = None): + """Return the configured image understanding model for AnalyzeImageTool. + + The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"] + for compatibility, but it is the user-facing image understanding configuration. + """ + if model_id: + vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm") + else: + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id) + return _build_vlm_model(vlm_model_config) + + def get_image_understanding_model(tenant_id: str): return get_vlm_model(tenant_id=tenant_id) -def get_video_understanding_model(tenant_id: str): +def get_video_understanding_model(tenant_id: str, model_id: Optional[int] = None): """Return the configured video understanding model for multimodal tools.""" - vlm_model_config = tenant_config_manager.get_model_config( - key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) - if not vlm_model_config: - return None - return OpenAIVLModel( - observer=MessageObserver(), - model_id=get_model_name_from_config( - vlm_model_config) if vlm_model_config else "", - api_base=vlm_model_config.get("base_url", ""), - api_key=vlm_model_config.get("api_key", ""), - temperature=0.7, - top_p=0.7, - frequency_penalty=0.5, - max_tokens=512, - ssl_verify=vlm_model_config.get("ssl_verify", True), - ) + if model_id: + vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm3") + else: + vlm_model_config = tenant_config_manager.get_model_config( + key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id) + return _build_vlm_model(vlm_model_config) diff --git a/backend/services/northbound_service.py b/backend/services/northbound_service.py index c5493a551..a75b92ce0 100644 --- a/backend/services/northbound_service.py +++ b/backend/services/northbound_service.py @@ -133,7 +133,7 @@ def _normalize_northbound_attachments( tenant_id: str, ) -> Optional[List[Dict[str, Any]]]: """Convert northbound attachment references into internal minio_files objects. - + Supports two formats: 1. List of S3 URL strings (backward compatible): ["s3://nexent/...", "/nexent/...", "attachments/..."] 2. List of attachment objects (full metadata): [{"object_name": "...", "name": "...", ...}] diff --git a/backend/services/tool_configuration_service.py b/backend/services/tool_configuration_service.py index 6e6260544..0f5de35c3 100644 --- a/backend/services/tool_configuration_service.py +++ b/backend/services/tool_configuration_service.py @@ -815,7 +815,8 @@ def _validate_local_tool( raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") # get_vlm_model reads the first multimodal slot, now shown as image understanding. - image_to_text_model = get_vlm_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + image_to_text_model = get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id) vlm_display_name = getattr( image_to_text_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) @@ -832,7 +833,8 @@ def _validate_local_tool( if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") - video_understanding_model = get_video_understanding_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + video_understanding_model = get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id) model_display_name = getattr( video_understanding_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) @@ -849,7 +851,8 @@ def _validate_local_tool( if not tenant_id or not user_id: raise ToolExecutionException( f"Tenant ID and User ID are required for {tool_name} validation") - long_text_to_text_model = get_llm_model(tenant_id=tenant_id) + selected_model_id = instantiation_params.get("selected_model_id") + long_text_to_text_model = get_llm_model(tenant_id=tenant_id, model_id=selected_model_id) llm_display_name = getattr( long_text_to_text_model, 'display_name', None) set_monitoring_context(tenant_id=tenant_id) diff --git a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx index fbbf6db78..f249f49aa 100644 --- a/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx +++ b/frontend/app/[locale]/agents/components/agentConfig/tool/ToolConfigModal.tsx @@ -35,12 +35,15 @@ import { } from "@/hooks/useKnowledgeBaseConfigChangeHandler"; import { API_ENDPOINTS } from "@/services/api"; import knowledgeBaseService from "@/services/knowledgeBaseService"; +import { modelService } from "@/services/modelService"; import log from "@/lib/logger"; +import { MODEL_TYPES } from "@/const/modelConfig"; import { isEmbeddingModelCompatible as isEmbeddingModelCompatibleBase, isMultimodalConstraintMismatch as isMultimodalConstraintMismatchBase, } from "@/lib/knowledgeBaseCompatibility"; import { isZhLocale, getLocalizedDescription, getKbDisplayName, mapKbIdsToDisplayNames, parseKbIds } from "@/lib/utils"; +import { ModelOption, ModelType } from "@/types/modelConfig"; export interface ToolConfigModalProps { isOpen: boolean; @@ -69,6 +72,24 @@ const TOOLS_SUPPORTING_RERANK = [ "datamate_search", ]; +const ANALYZE_TOOL_MODEL_TYPES: Record = { + analyze_text_file: MODEL_TYPES.LLM, + analyze_image: MODEL_TYPES.VLM, + analyze_audio: MODEL_TYPES.VLM3, + analyze_video: MODEL_TYPES.VLM3, +}; + +const ANALYZE_TOOL_MODEL_DESCRIPTIONS: Record = { + analyze_text_file: + "Optional Nexent LLM model ID to use for text file analysis. If omitted, the default LLM model is used.", + analyze_image: + "Optional Nexent image understanding model ID to use for image analysis. If omitted, the default image understanding model is used.", + analyze_audio: + "Optional Nexent video understanding model ID to use for audio analysis. If omitted, the default video understanding model is used.", + analyze_video: + "Optional Nexent video understanding model ID to use for video analysis. If omitted, the default video understanding model is used.", +}; + function withRerankParams(params: ToolParam[], toolName?: string): ToolParam[] { if (!toolName || !TOOLS_SUPPORTING_RERANK.includes(toolName)) return params; @@ -101,6 +122,38 @@ function withRerankParams(params: ToolParam[], toolName?: string): ToolParam[] { return next; } +function withAnalyzeToolModelParam(params: ToolParam[], toolName?: string): ToolParam[] { + if (!toolName || !ANALYZE_TOOL_MODEL_TYPES[toolName]) return params; + + const normalizedParams = params.map((param) => { + if (param.name !== "selected_model_id") return param; + const value = + param.value === "" || param.value === undefined || param.value === null + ? undefined + : Number(param.value); + return { ...param, value }; + }); + + if (normalizedParams.some((param) => param.name === "selected_model_id")) { + return normalizedParams; + } + + return [ + ...normalizedParams, + { + name: "selected_model_id", + type: "number", + required: false, + value: undefined, + description: ANALYZE_TOOL_MODEL_DESCRIPTIONS[toolName], + }, + ]; +} + +function withExtraToolParams(params: ToolParam[], toolName?: string): ToolParam[] { + return withAnalyzeToolModelParam(withRerankParams(params, toolName), toolName); +} + export default function ToolConfigModal({ isOpen, onCancel, @@ -131,6 +184,29 @@ export default function ToolConfigModal({ // Use React Query for config data const { data: configData } = useConfig(); + const analyzeToolModelType = tool?.name + ? ANALYZE_TOOL_MODEL_TYPES[tool.name] + : undefined; + const isAnalyzeToolWithModelSelection = Boolean(analyzeToolModelType); + const { + data: registeredModels = [], + isFetching: registeredModelsLoading, + } = useQuery({ + queryKey: ["models", "registered", "toolConfig", analyzeToolModelType], + queryFn: () => modelService.getAllModels(), + enabled: isOpen && isAnalyzeToolWithModelSelection, + staleTime: 60_000, + gcTime: 5 * 60_000, + }); + const analyzeToolModelOptions = useMemo(() => { + if (!analyzeToolModelType) return []; + return registeredModels + .filter((model) => model.type === analyzeToolModelType) + .map((model) => ({ + value: model.id, + label: model.displayName || model.name, + })); + }, [registeredModels, analyzeToolModelType]); const [selectedKbDisplayNames, setSelectedKbDisplayNames] = useState< string[] >([]); @@ -720,7 +796,7 @@ export default function ToolConfigModal({ // If server_url already has a saved value, use it if (serverUrlParam?.value) { // Initialize form with saved values (including server_url) - const paramsWithRerank = withRerankParams(initialParams, tool.name); + const paramsWithRerank = withExtraToolParams(initialParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -767,7 +843,7 @@ export default function ToolConfigModal({ return param; }); - const paramsWithRerank = withRerankParams(updatedParams, tool.name); + const paramsWithRerank = withExtraToolParams(updatedParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; @@ -777,7 +853,7 @@ export default function ToolConfigModal({ form.setFieldsValue(formValues); } else { // Either no default available OR user has modified the URL, initialize with initialParams - const paramsWithRerank = withRerankParams(initialParams, tool.name); + const paramsWithRerank = withExtraToolParams(initialParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -858,7 +934,7 @@ export default function ToolConfigModal({ return param; }); - const paramsWithRerank = withRerankParams(updatedParams, tool.name); + const paramsWithRerank = withExtraToolParams(updatedParams, tool.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; @@ -910,7 +986,7 @@ export default function ToolConfigModal({ // Initialize form values const paramsWithDefaults = applyInitParamDefaults(initialParams); const paramsMigrated = migrateAidpParamNames(paramsWithDefaults); - const paramsWithRerank = withRerankParams(paramsMigrated, tool?.name); + const paramsWithRerank = withExtraToolParams(paramsMigrated, tool?.name); setCurrentParams(paramsWithRerank); const formValues: Record = {}; paramsWithRerank.forEach((param, index) => { @@ -1540,6 +1616,22 @@ export default function ToolConfigModal({ // Determine if this parameter should be rendered as a select dropdown const isSelectType = options && options.length > 0; + if (param.name === "selected_model_id" && isAnalyzeToolWithModelSelection) { + return ( +