ModelEngine-Group · WMC001 · Jun 22, 2026 · Jun 16, 2026 · Jun 16, 2026 · Jun 17, 2026
@@ -578,6 +578,7 @@ async def create_agent_config(
     system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs)
 
     model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id")
+    model_info = None
     model_max_tokens = 10000
     if model_id_to_use is not None:
         model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id)
@@ -587,6 +588,14 @@ async def create_agent_config(
     else:
         model_name = "main_model"
 
+    logger.info(
+        "Agent main LLM: agent_id=%s, model_id=%s, display_name=%s, model_name=%s",
+        agent_id,
+        model_id_to_use,
+        model_info.get("display_name") if model_info else model_name,
+        model_info.get("model_name") if model_info else model_name,
+    )
+
     # Use agent-level setting for context management, default to False.
     # When ContextManager is disabled, do not attach context_components because
     # downstream runtime may prefer component-based prompt assembly over the
@@ -759,22 +768,25 @@ async def create_tool_config_list(
                 "rerank_model": rerank_model,
             }
         elif tool_config.class_name == "AnalyzeTextFileTool":
+            selected_model_id = param_dict.get("selected_model_id")
             tool_config.metadata = {
-                "llm_model": get_llm_model(tenant_id=tenant_id),
+                "llm_model": get_llm_model(tenant_id=tenant_id, model_id=selected_model_id),
                 "storage_client": minio_client,
                 "data_process_service_url": DATA_PROCESS_SERVICE,
                 "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
             }
         elif tool_config.class_name == "AnalyzeImageTool":
+            selected_model_id = param_dict.get("selected_model_id")
             tool_config.metadata = {
                 # get_vlm_model reads the first multimodal slot, now shown as image understanding.
-                "vlm_model": get_vlm_model(tenant_id=tenant_id),
+                "vlm_model": get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id),
                 "storage_client": minio_client,
                 "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
             }
         elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]:
+            selected_model_id = param_dict.get("selected_model_id")
             tool_config.metadata = {
-                "vlm_model": get_video_understanding_model(tenant_id=tenant_id),
+                "vlm_model": get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id),
                 "storage_client": minio_client,
                 "validate_url_access": lambda urls: validate_urls_access(urls, user_id)
             }

@@ -33,6 +33,7 @@
     list_files,
     upload_fileobj,
 )
+from database.model_management_db import get_model_by_model_id
 from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
 from utils.file_management_utils import save_upload_file
@@ -448,20 +449,39 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
     return files
 
 
-def get_llm_model(tenant_id: str):
-    # Get the tenant config
-    main_model_config = tenant_config_manager.get_model_config(
-        key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
+def get_llm_model(tenant_id: str, model_id: Optional[int] = None):
+    if model_id:
+        main_model_config = get_model_by_model_id(int(model_id), tenant_id)
+        if not main_model_config:
+            raise ValueError(f"Model not found: {model_id}")
+        if main_model_config.get("model_type") != "llm":
+            raise ValueError(f"Selected model {model_id} is not an LLM model")
+    else:
+        # Get the tenant config
+        main_model_config = tenant_config_manager.get_model_config(
+            key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
     timeout_seconds = main_model_config.get(
         "timeout_seconds") if main_model_config else None
+
+    resolved_model_name = get_model_name_from_config(main_model_config)
+
+    logger.info(
+        "Using LLM model for analyze_text_file: model_id=%s, display_name=%s, model_name=%s",
+        model_id,
+        main_model_config.get("display_name") if main_model_config else None,
+        resolved_model_name
+    )
+
     long_text_to_text_model = OpenAILongContextModel(
         observer=MessageObserver(),
-        model_id=get_model_name_from_config(main_model_config),
+        model_id=resolved_model_name,
         api_base=main_model_config.get("base_url"),
         api_key=main_model_config.get("api_key"),
         max_context_tokens=main_model_config.get("max_tokens"),
         ssl_verify=main_model_config.get("ssl_verify", True),
         timeout_seconds=timeout_seconds,
+        model_factory=main_model_config.get("model_factory"),
+        display_name=main_model_config.get("display_name"),
     )
     return long_text_to_text_model
 

@@ -3,12 +3,14 @@
 import logging
 import socket
 from http import HTTPStatus
+from typing import Optional
 from urllib.parse import urlparse, urlunparse
 
 import aiohttp
 
 from consts.const import DATA_PROCESS_SERVICE
 from consts.const import MODEL_CONFIG_MAPPING
+from database.model_management_db import get_model_by_model_id
 from utils.config_utils import tenant_config_manager, get_model_name_from_config
 
 from nexent import MessageObserver
@@ -146,14 +148,19 @@ async def proxy_image_impl(decoded_url: str):
             return result
 
 
-def get_vlm_model(tenant_id: str):
-    """Return the configured image understanding model for AnalyzeImageTool.
+def _get_model_config_by_id(tenant_id, model_id, expected_model_type):
+    if not model_id:
+        return None
 
-    The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
-    for compatibility, but it is the user-facing image understanding configuration.
-    """
-    vlm_model_config = tenant_config_manager.get_model_config(
-        key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
+    model_config = get_model_by_model_id(int(model_id), tenant_id)
+    if not model_config:
+        raise ValueError(f"Model not found: {model_id}")
+    if model_config.get("model_type") != expected_model_type:
+        raise ValueError(f"Selected model {model_id} is not a {expected_model_type} model")
+    return model_config
+
+
+def _build_vlm_model(vlm_model_config):
     if not vlm_model_config:
         return None
     return OpenAIVLModel(
@@ -167,28 +174,34 @@ def get_vlm_model(tenant_id: str):
         frequency_penalty=0.5,
         max_tokens=512,
         ssl_verify=vlm_model_config.get("ssl_verify", True),
+        model_factory=vlm_model_config.get("model_factory"),
+        display_name=vlm_model_config.get("display_name"),
     )
 
 
+def get_vlm_model(tenant_id: str, model_id: Optional[int] = None):
+    """Return the configured image understanding model for AnalyzeImageTool.
+
+    The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
+    for compatibility, but it is the user-facing image understanding configuration.
+    """
+    if model_id:
+        vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm")
+    else:
+        vlm_model_config = tenant_config_manager.get_model_config(
+            key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
+    return _build_vlm_model(vlm_model_config)
+
+
 def get_image_understanding_model(tenant_id: str):
     return get_vlm_model(tenant_id=tenant_id)
 
 
-def get_video_understanding_model(tenant_id: str):
+def get_video_understanding_model(tenant_id: str, model_id: Optional[int] = None):
     """Return the configured video understanding model for multimodal tools."""
-    vlm_model_config = tenant_config_manager.get_model_config(
-        key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
-    if not vlm_model_config:
-        return None
-    return OpenAIVLModel(
-        observer=MessageObserver(),
-        model_id=get_model_name_from_config(
-            vlm_model_config) if vlm_model_config else "",
-        api_base=vlm_model_config.get("base_url", ""),
-        api_key=vlm_model_config.get("api_key", ""),
-        temperature=0.7,
-        top_p=0.7,
-        frequency_penalty=0.5,
-        max_tokens=512,
-        ssl_verify=vlm_model_config.get("ssl_verify", True),
-    )
+    if model_id:
+        vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm3")
+    else:
+        vlm_model_config = tenant_config_manager.get_model_config(
+            key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
+    return _build_vlm_model(vlm_model_config)
@@ -133,7 +133,7 @@ def _normalize_northbound_attachments(
     tenant_id: str,
 ) -> Optional[List[Dict[str, Any]]]:
     """Convert northbound attachment references into internal minio_files objects.
-    
+
     Supports two formats:
     1. List of S3 URL strings (backward compatible): ["s3://nexent/...", "/nexent/...", "attachments/..."]
     2. List of attachment objects (full metadata): [{"object_name": "...", "name": "...", ...}]

@@ -815,7 +815,8 @@ def _validate_local_tool(
                 raise ToolExecutionException(
                     f"Tenant ID and User ID are required for {tool_name} validation")
             # get_vlm_model reads the first multimodal slot, now shown as image understanding.
-            image_to_text_model = get_vlm_model(tenant_id=tenant_id)
+            selected_model_id = instantiation_params.get("selected_model_id")
+            image_to_text_model = get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id)
             vlm_display_name = getattr(
                 image_to_text_model, 'display_name', None)
             set_monitoring_context(tenant_id=tenant_id)
@@ -832,7 +833,8 @@ def _validate_local_tool(
             if not tenant_id or not user_id:
                 raise ToolExecutionException(
                     f"Tenant ID and User ID are required for {tool_name} validation")
-            video_understanding_model = get_video_understanding_model(tenant_id=tenant_id)
+            selected_model_id = instantiation_params.get("selected_model_id")
+            video_understanding_model = get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id)
             model_display_name = getattr(
                 video_understanding_model, 'display_name', None)
             set_monitoring_context(tenant_id=tenant_id)
@@ -849,7 +851,8 @@ def _validate_local_tool(
             if not tenant_id or not user_id:
                 raise ToolExecutionException(
                     f"Tenant ID and User ID are required for {tool_name} validation")
-            long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
+            selected_model_id = instantiation_params.get("selected_model_id")
+            long_text_to_text_model = get_llm_model(tenant_id=tenant_id, model_id=selected_model_id)
             llm_display_name = getattr(
                 long_text_to_text_model, 'display_name', None)
             set_monitoring_context(tenant_id=tenant_id)