diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml
index f0e075848..2e3d948ce 100755
--- a/docker-compose-library.yaml
+++ b/docker-compose-library.yaml
@@ -20,6 +20,7 @@ services:
       - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro
       - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z
       - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface
+      - ./tests/e2e/skills:/app-root/skills:ro,Z
       - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z
       - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z
     environment:
diff --git a/docker-compose.yaml b/docker-compose.yaml
index c3f026e70..aa4631ad6 100755
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -87,6 +87,7 @@ services:
       - "8080:8080"
     volumes:
       - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z
+      - ./tests/e2e/skills:/app-root/skills:ro,z
       - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z
       - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z
     environment:
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml
new file mode 100644
index 000000000..0aff2f67b
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
+skills:
+  paths:
+    - skills
diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml
new file mode 100644
index 000000000..1a7177434
--- /dev/null
+++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml
@@ -0,0 +1,25 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Library mode - embeds llama-stack as library
+  use_as_library_client: true
+  library_client_config_path: run.yaml
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
+skills:
+  paths:
+    - skills/echo
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml
new file mode 100644
index 000000000..0ae7888c7
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml
@@ -0,0 +1,26 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://${env.E2E_LLAMA_HOSTNAME}:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
+skills:
+  paths:
+    - skills
diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml
new file mode 100644
index 000000000..387d03856
--- /dev/null
+++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml
@@ -0,0 +1,26 @@
+name: Lightspeed Core Service (LCS)
+service:
+  host: 0.0.0.0
+  port: 8080
+  auth_enabled: false
+  workers: 1
+  color_log: true
+  access_log: true
+llama_stack:
+  # Server mode - connects to separate llama-stack service
+  use_as_library_client: false
+  url: http://${env.E2E_LLAMA_HOSTNAME}:8321
+  api_key: xyzzy
+user_data_collection:
+  feedback_enabled: true
+  feedback_storage: "/tmp/data/feedback"
+  transcripts_enabled: true
+  transcripts_storage: "/tmp/data/transcripts"
+authentication:
+  module: "noop"
+inference:
+  default_provider: openai
+  default_model: gpt-4o-mini
+skills:
+  paths:
+    - skills/echo
diff --git a/tests/e2e/features/skills.feature b/tests/e2e/features/skills.feature
index c6f01d9cf..b25d12e6d 100644
--- a/tests/e2e/features/skills.feature
+++ b/tests/e2e/features/skills.feature
@@ -1,4 +1,4 @@
-@e2e_group_2 @skip
+@e2e_group_2
 Feature: Agent skills tests
 
   Background:
@@ -9,14 +9,14 @@ Feature: Agent skills tests
 
   # --- Skill tools registration ---
 
-  @SkillsConfig
+  @SkillsConfig @skip
   Scenario: Skill tools are registered when skills are configured
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills.yaml configuration
+    Given The service uses the lightspeed-stack-skills.yaml configuration
+      And MCP toolgroups are reset for a new MCP configuration
       And The service is restarted
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 200
-     And The body of the response is the following    #TODO: Currently placeholder, should reflect actual tools (all tools not just skill tools)
+     And The body of the response is the following
       """
       {
         "tools": [
@@ -56,7 +56,7 @@ Feature: Agent skills tests
             "type": "tool"
           },
           {
-            "identifier": "activate_skill",
+            "identifier": "load_skill",
             "description": "Load full instructions for a skill. Call this when a task matches a skill's description.",
             "parameters": [
               {
@@ -73,7 +73,7 @@ Feature: Agent skills tests
             "type": "tool"
           },
           {
-            "identifier": "load_skill_resource",
+            "identifier": "read_skill_resource",
             "description": "Load a file from a skill's references/ directory. Use this when skill instructions reference additional documentation.",
             "parameters": [
               {
@@ -133,10 +133,11 @@ Feature: Agent skills tests
 
   Scenario: Skill tools are not registered when no skills are configured
     Given The service uses the lightspeed-stack.yaml configuration
+      And MCP toolgroups are reset for a new MCP configuration
       And The service is restarted
     When I access REST API endpoint "tools" using HTTP GET method
     Then The status code of the response is 200
-     And The body of the response is the following    #TODO: Currently placeholder, should reflect actual tools (default tools, not skill tools)
+     And The body of the response is the following
       """
       {
         "tools": [
@@ -166,7 +167,7 @@ Feature: Agent skills tests
             "server_source": "builtin",
             "type": "tool_group"
           }
-        ],
+        ]
       }
       """
 
@@ -174,8 +175,7 @@ Feature: Agent skills tests
 
   @SkillsConfig
   Scenario: LLM can discover skills via list_skills tool using query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "query" to ask question 
@@ -183,16 +183,22 @@ Feature: Agent skills tests
     {"query": "What skills are available? Use the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}",
+          "type": "function_call_output"
         }
       ]
       """
@@ -200,8 +206,7 @@ Feature: Agent skills tests
 
   @SkillsConfig
   Scenario: LLM can discover skills via list_skills tool using streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "streaming_query" to ask question 
@@ -211,16 +216,22 @@ Feature: Agent skills tests
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}",
+          "type": "function_call_output"
         }
       ]
       """
@@ -229,54 +240,70 @@ Feature: Agent skills tests
   # --- Skill activation ---
 
   @SkillsConfig
-  Scenario: LLM can activate a skill and use its instructions via query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  Scenario: LLM can Load a skill and use its instructions via query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "query" to ask question 
     """
-    {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "echo"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "<skill>\n<name>echo</name>\n<description>Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.</description>\n<uri>/app-root/skills/echo</uri>\n\n<resources>\n<resource name=\"references/guide.md\" />\n</resources>\n\n<scripts>\n<!-- No scripts -->\n</scripts>\n\n<instructions>\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n</instructions>\n</skill>\n",
+          "type": "function_call_output"
         }
       ]
       """
       And The token metrics have increased
 
   @SkillsConfig
-  Scenario: LLM can activate a skill and use its instructions via streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  Scenario: LLM can load a skill and use its instructions via streaming_query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "streaming_query" to ask question 
     """
-    {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "echo"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "<skill>\n<name>echo</name>\n<description>Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.</description>\n<uri>/app-root/skills/echo</uri>\n\n<resources>\n<resource name=\"references/guide.md\" />\n</resources>\n\n<scripts>\n<!-- No scripts -->\n</scripts>\n\n<instructions>\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n</instructions>\n</skill>\n",
+          "type": "function_call_output"
         }
       ]
       """
@@ -286,53 +313,72 @@ Feature: Agent skills tests
   # --- Skill resource loading ---
 
   @SkillsConfig
-  Scenario: LLM can load a skill reference file via load_skill_resource tool using query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  Scenario: LLM can load a skill reference file via read_skill_resource tool using query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "query" to ask question 
     """
-    {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/guide.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
-        }      ]
+          "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n",
+          "type": "function_call_output"
+        }
+      ]
       """
       And The token metrics have increased
 
   @SkillsConfig
-  Scenario: LLM can load a skill reference file via load_skill_resource tool using streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  Scenario: LLM can load a skill reference file via read_skill_resource tool using streaming_query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "streaming_query" to ask question 
     """
-    {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/guide.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n",
+          "type": "function_call_output"
         }
       ]
       """
@@ -340,103 +386,134 @@ Feature: Agent skills tests
 
   # --- Error handling: unknown skill ---
 
-  @SkillsConfig
-  Scenario: activate_skill returns error for unknown skill name via query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  @SkillsConfig @skip
+  Scenario: load_skill returns error for unknown skill name via query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
     When I use "query" to ask question 
     """
-    {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+     And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "nonexistent-skill"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "type": "function_call_output"
         }
       ]
       """
 
-  @SkillsConfig
-  Scenario: activate_skill returns error for unknown skill name via streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+
+  @SkillsConfig @skip
+  Scenario: load_skill returns error for unknown skill name via streaming_query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
     When I use "streaming_query" to ask question 
     """
-    {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+     And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "nonexistent-skill"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "type": "function_call_output"
         }
       ]
       """
   # --- Error handling: missing resource ---
 
-  @SkillsConfig
-  Scenario: load_skill_resource returns error for nonexistent resource file via query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  @SkillsConfig @skip
+  Scenario: read_skill_resource returns error for nonexistent resource file via query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
     When I use "query" to ask question 
     """
-    {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+     And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/nonexistent.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "type": "function_call_output"
         }
       ]
       """
 
-  @SkillsConfig
-  Scenario: load_skill_resource returns error for nonexistent resource file via streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  @SkillsConfig @skip
+  Scenario: read_skill_resource returns error for nonexistent resource file via streaming_query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
     When I use "streaming_query" to ask question 
     """
-    {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/nonexistent.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "type": "function_call_output"
         }
       ]
       """
@@ -444,83 +521,101 @@ Feature: Agent skills tests
 
   # --- Context management: deduplication ---
 
-  @SkillsConfig
+  @SkillsConfig @skip
   Scenario: Duplicate skill activation in same conversation returns already-loaded note via query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
     When I use "query" to ask question 
     """
-    {"query": "Activate e2e-test-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load the 'echo' skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
      And I store conversation details
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
-      """
-      [
-        {
-          "id": "<call_id>",
-          "name": "activate_skill"
-          "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
-        }
-      ]
-      """
+    And The body of the "tool_calls" field of the response is the following    
+    """
+    [
+      {
+        "name": "load_skill",
+        "args": {
+          "skill_name": "echo"
+        },
+        "type": "function_call"
+      }
+    ]
+    """
+    And The body of the "tool_results" field of the response is the following    
+    """
+    [
+      {
+        "status": "success",
+        "content": "<skill>\n<name>echo</name>\n<description>Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.</description>\n<uri>/app-root/skills/echo</uri>\n\n<resources>\n<resource name=\"references/guide.md\" />\n</resources>\n\n<scripts>\n<!-- No scripts -->\n</scripts>\n\n<instructions>\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n</instructions>\n</skill>\n",
+        "type": "function_call_output"
+      }
+    ]
+    """
 
     When I use "query" to ask question with same conversation_id
     """
-    {"query": "Activate e2e-test-skill again using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Load the 'echo' skill again using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
-      """
-      [
-        {
-          "id": "<call_id>",
-          "name": "activate_skill"
-          "status": "failure",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
-        }
-      ]
-      """
+    And The body of the "tool_calls" field of the response is the following    
+    """
+    [
+      {
+        "name": "load_skill",
+        "args": {
+          "skill_name": "echo"
+        },
+        "type": "function_call"
+      }
+    ]
+    """
+    And The body of the "tool_results" field of the response is the following    
+    """
+    [
+      {
+        "status": "failure",
+        "type": "function_call_output"
+      }
+    ]
+    """
 
 
   # --- Multiple skills ---
 
   @SkillsMultiConfig
   Scenario: Skills directory path discovers all skills in subdirectories via query endpoint
-    Given The e2e-test-skill skill directory path is "skills/e2e-test-skill"
-      And The e2e-second-skill skill directory path is "skills/e2e-second-skill"
-      And The service uses the lightspeed-stack-skills-directory.yaml configuration
+    Given The service uses the lightspeed-stack-skills-directory.yaml configuration
       And The service is restarted
     When I use "query" to ask question 
     """
     {"query": "List all available skills using the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}",
+          "type": "function_call_output"
         }
       ]
       """
 
   @SkillsMultiConfig
   Scenario: Skills directory path discovers all skills in subdirectories via streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "skills/e2e-test-skill"
-      And The e2e-second-skill skill directory path is "skills/e2e-second-skill"
-      And The service uses the lightspeed-stack-skills-directory.yaml configuration
+    Given The service uses the lightspeed-stack-skills-directory.yaml configuration
       And The service is restarted
     When I use "streaming_query" to ask question 
     """
@@ -529,103 +624,143 @@ Feature: Agent skills tests
     When I wait for the response to be completed
     Then The status code of the response is 200
       And The response is the last streamed fragment
-      And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}",
+          "type": "function_call_output"
         }
       ]
       """
 
   # --- Full progressive disclosure flow ---
 
-  @SkillsConfig @flaky
-  Scenario: LLM completes list_skills then activate_skill then load_skill_resource via query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand)
+  Scenario: LLM completes list_skills then load_skill then read_skill_resource via query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "query" to ask question
     """
-    {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     Then The status code of the response is 200
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        },
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "echo"
+          },
+          "type": "function_call"
+        },
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/guide.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}",
+          "type": "function_call_output",
+          "round": 1
         },
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "<skill>\n<name>echo</name>\n<description>Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.</description>\n<uri>/app-root/skills/echo</uri>\n\n<resources>\n<resource name=\"references/guide.md\" />\n</resources>\n\n<scripts>\n<!-- No scripts -->\n</scripts>\n\n<instructions>\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n</instructions>\n</skill>\n",
+          "type": "function_call_output",
+          "round": 2
         },
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n",
+          "type": "function_call_output",
+          "round": 3
         }
       ]
       """
 
 
-  @SkillsConfig
-  Scenario: LLM completes list_skills then activate_skill then load_skill_resource via streaming_query endpoint
-    Given The e2e-test-skill skill directory path is "e2e-test-skill"
-      And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration
+  @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand)
+  Scenario: LLM completes list_skills then load_skill then read_skill_resource via streaming_query endpoint
+    Given The service uses the lightspeed-stack-skills.yaml configuration
       And The service is restarted
       And I capture the current token metrics
     When I use "streaming_query" to ask question
     """
-    {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"}
+    {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"}
     """
     When I wait for the response to be completed
     Then The status code of the response is 200
      And The response is the last streamed fragment
-     And The body of the "tool_results" field is    #TODO: Currently placeholder, should reflect actual tool results
+      And The body of the "tool_calls" field of the response is the following    
+      """
+      [
+        {
+          "name": "list_skills",
+          "type": "function_call"
+        },
+        {
+          "name": "load_skill",
+          "args": {
+            "skill_name": "echo"
+          },
+          "type": "function_call"
+        },
+        {
+          "name": "read_skill_resource",
+          "args": {
+            "skill_name": "echo",
+            "resource_name": "references/guide.md"
+          },
+          "type": "function_call"
+        }
+      ]
+      """
+      And The body of the "tool_results" field of the response is the following    
       """
       [
         {
-          "id": "<call_id>",
-          "name": "list_skills"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}",
+          "type": "function_call_output",
+          "round": 1
         },
         {
-          "id": "<call_id>",
-          "name": "activate_skill"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "<skill>\n<name>echo</name>\n<description>Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.</description>\n<uri>/app-root/skills/echo</uri>\n\n<resources>\n<resource name=\"references/guide.md\" />\n</resources>\n\n<scripts>\n<!-- No scripts -->\n</scripts>\n\n<instructions>\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n</instructions>\n</skill>\n",
+          "type": "function_call_output",
+          "round": 2
         },
         {
-          "id": "<call_id>",
-          "name": "load_skill_resource"
           "status": "success",
-          "content": "<tool_call content>",
-          "type": "tool_result",
-          "round": 1,
+          "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n",
+          "type": "function_call_output",
+          "round": 3
         }
       ]
       """
diff --git a/tests/e2e/features/steps/common_http.py b/tests/e2e/features/steps/common_http.py
index 24369eb7d..d5919d6bd 100644
--- a/tests/e2e/features/steps/common_http.py
+++ b/tests/e2e/features/steps/common_http.py
@@ -305,3 +305,31 @@ def set_header(context: Context, header_name: str) -> None:
         except json.JSONDecodeError:
             pass
     context.auth_headers[header_name] = value
+
+
+@then('The body of the "{field}" field of the response is the following')
+def check_response_field_body(context: Context, field: str) -> None:
+    """Check the content of a specific field in the response body.
+
+    Parameters:
+        context: Behave context with ``response`` and/or ``response_data``.
+        field: Name of the field to check (e.g. ``tool_results``).
+    """
+    if getattr(context, "use_streaming_response_data", False):
+        response_body = context.response_data
+    else:
+        assert context.response is not None, "Request needs to be performed first"
+        response_body = context.response.json()
+
+    assert field in response_body, (
+        f"Field '{field}' not found in response. "
+        f"Available fields: {list(response_body.keys())}"
+    )
+
+    actual_value = response_body[field]
+
+    if not context.text:
+        return
+
+    expected_value = json.loads(context.text)
+    validate_json_partially(actual_value, expected_value)
diff --git a/tests/e2e/features/steps/llm_query_response.py b/tests/e2e/features/steps/llm_query_response.py
index b0f992861..36b899ce0 100644
--- a/tests/e2e/features/steps/llm_query_response.py
+++ b/tests/e2e/features/steps/llm_query_response.py
@@ -364,7 +364,9 @@ def _parse_streaming_response(response_text: str) -> dict:
     lines = response_text.strip().split("\n")
     conversation_id = None
     full_response = ""
-    full_response_split = []
+    full_response_split: list[str] = []
+    tool_calls: list[dict[str, Any]] = []
+    tool_results: list[dict[str, Any]] = []
     finished = False
     stream_error = (
         None  # {"status_code": int, "response": str, "cause": str} if event "error"
@@ -380,6 +382,10 @@ def _parse_streaming_response(response_text: str) -> dict:
                     conversation_id = data["data"]["conversation_id"]
                 elif event == "token":
                     full_response_split.append(data["data"]["token"])
+                elif event == "tool_call":
+                    tool_calls.append(data["data"])
+                elif event == "tool_result":
+                    tool_results.append(data["data"])
                 elif event == "turn_complete":
                     full_response = data["data"]["token"]
                 elif event == "end":
@@ -393,6 +399,23 @@ def _parse_streaming_response(response_text: str) -> dict:
         "conversation_id": conversation_id,
         "response": "".join(full_response_split),
         "response_complete": full_response,
+        "tool_calls": tool_calls,
+        "tool_results": tool_results,
         "finished": finished,
         "stream_error": stream_error,
     }
+
+
+@then("The response is the last streamed fragment")
+def response_is_last_streamed_fragment(context: Context) -> None:
+    """Assert streaming finished and flag context for field checks.
+
+    Sets ``context.use_streaming_response_data`` so subsequent steps
+    read from ``context.response_data`` instead of the raw HTTP JSON.
+    """
+    assert hasattr(context, "response_data"), "Streaming response has not been parsed"
+    assert (
+        context.response_data.get("finished") is True
+    ), "Streaming response not finished"
+    context.use_streaming_response_data = True
+    print(context.response_data)
diff --git a/tests/e2e/skills/echo/SKILL.md b/tests/e2e/skills/echo/SKILL.md
new file mode 100644
index 000000000..e94e52612
--- /dev/null
+++ b/tests/e2e/skills/echo/SKILL.md
@@ -0,0 +1,19 @@
+---
+name: echo
+description: Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.
+---
+
+# Echo Skill
+
+## When to use this skill
+
+Use this skill when:
+- A user asks to echo or repeat text
+- A user wants to verify that the agent can return their input verbatim
+
+## Instructions
+
+1. Read the user's input text
+2. Return the exact text back to the user without modification
+
+For formatting guidelines, see [references/guide.md](references/guide.md).
diff --git a/tests/e2e/skills/echo/references/guide.md b/tests/e2e/skills/echo/references/guide.md
new file mode 100644
index 000000000..dee54b305
--- /dev/null
+++ b/tests/e2e/skills/echo/references/guide.md
@@ -0,0 +1,19 @@
+# Echo Formatting Guide
+
+## Output format
+
+When echoing text back to the user, follow these rules:
+
+- Preserve the exact input text without any modification
+- Do not add quotation marks around the echoed text
+- Do not add any prefix like "Echo:" or "Output:"
+- Return only the echoed text as the response
+- Preserve whitespace and line breaks exactly as provided
+
+## Examples
+
+**Input**: `Hello World!`
+**Output**: `Hello World!`
+
+**Input**: `multiple words with spaces`
+**Output**: `multiple words with spaces`
diff --git a/tests/e2e/skills/summarize/SKILL.md b/tests/e2e/skills/summarize/SKILL.md
new file mode 100644
index 000000000..b9fd63d55
--- /dev/null
+++ b/tests/e2e/skills/summarize/SKILL.md
@@ -0,0 +1,21 @@
+---
+name: summarize
+description: Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.
+---
+
+# Summarize Skill
+
+## When to use this skill
+
+Use this skill when:
+- A user asks to summarize or condense text
+- A user wants a brief overview of longer content
+- A user requests a TL;DR or short version
+
+## Instructions
+
+1. Read the user's input text
+2. Identify the key point or main idea
+3. Return a single concise sentence that captures the essence of the input
+
+For formatting guidelines, see [references/guide.md](references/guide.md).
diff --git a/tests/e2e/skills/summarize/references/guide.md b/tests/e2e/skills/summarize/references/guide.md
new file mode 100644
index 000000000..fa2985dd5
--- /dev/null
+++ b/tests/e2e/skills/summarize/references/guide.md
@@ -0,0 +1,20 @@
+# Summarize Formatting Guide
+
+## Output format
+
+When summarizing text for the user, follow these rules:
+
+- Return exactly one sentence
+- Do not add quotation marks around the summary
+- Do not add any prefix like "Summary:" or "TL;DR:"
+- Keep the summary under 30 words
+- Use simple, clear language
+- Preserve the original meaning without adding interpretation
+
+## Examples
+
+**Input**: `The quick brown fox jumped over the lazy dog while the cat watched from the windowsill and the bird flew overhead.`
+**Output**: `A fox jumped over a dog while a cat and bird observed nearby.`
+
+**Input**: `We need to upgrade our database server because the current one is running out of disk space and memory, which causes frequent timeouts during peak hours.`
+**Output**: `The database server needs upgrading due to insufficient disk space and memory causing peak-hour timeouts.`
diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt
index 8d6dd7fa8..6d34ca30d 100644
--- a/tests/e2e/test_list.txt
+++ b/tests/e2e/test_list.txt
@@ -32,3 +32,4 @@ features/tls-ca.feature
 features/tls-mtls.feature
 features/tls-tlsv13.feature
 features/opentelemetry.feature
+features/skills.feature
\ No newline at end of file