diff --git a/docker-compose-library.yaml b/docker-compose-library.yaml index f0e075848..2e3d948ce 100755 --- a/docker-compose-library.yaml +++ b/docker-compose-library.yaml @@ -20,6 +20,7 @@ services: - ${GCP_KEYS_PATH:-./tmp/.gcp-keys-dummy}:/opt/app-root/.gcp-keys:ro - ./tests/e2e/rag:/opt/app-root/src/.llama/storage/rag:Z - ${HF_CACHE_PATH:-./tmp/.hf-cache}:/opt/app-root/src/.cache/huggingface + - ./tests/e2e/skills:/app-root/skills:ro,Z - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z environment: diff --git a/docker-compose.yaml b/docker-compose.yaml index c3f026e70..aa4631ad6 100755 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -87,6 +87,7 @@ services: - "8080:8080" volumes: - ./lightspeed-stack.yaml:/app-root/lightspeed-stack.yaml:z + - ./tests/e2e/skills:/app-root/skills:ro,z - ./tests/e2e/secrets/mcp-token:/tmp/mcp-token:ro,z - ./tests/e2e/secrets/invalid-mcp-token:/tmp/invalid-mcp-token:ro,z environment: diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml new file mode 100644 index 000000000..0aff2f67b --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills-directory.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills diff --git a/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml new file mode 100644 index 000000000..1a7177434 --- /dev/null +++ b/tests/e2e/configuration/library-mode/lightspeed-stack-skills.yaml @@ -0,0 +1,25 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Library mode - embeds llama-stack as library + use_as_library_client: true + library_client_config_path: run.yaml +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills/echo diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml new file mode 100644 index 000000000..0ae7888c7 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills-directory.yaml @@ -0,0 +1,26 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills diff --git a/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml new file mode 100644 index 000000000..387d03856 --- /dev/null +++ b/tests/e2e/configuration/server-mode/lightspeed-stack-skills.yaml @@ -0,0 +1,26 @@ +name: Lightspeed Core Service (LCS) +service: + host: 0.0.0.0 + port: 8080 + auth_enabled: false + workers: 1 + color_log: true + access_log: true +llama_stack: + # Server mode - connects to separate llama-stack service + use_as_library_client: false + url: http://${env.E2E_LLAMA_HOSTNAME}:8321 + api_key: xyzzy +user_data_collection: + feedback_enabled: true + feedback_storage: "/tmp/data/feedback" + transcripts_enabled: true + transcripts_storage: "/tmp/data/transcripts" +authentication: + module: "noop" +inference: + default_provider: openai + default_model: gpt-4o-mini +skills: + paths: + - skills/echo diff --git a/tests/e2e/features/skills.feature b/tests/e2e/features/skills.feature index c6f01d9cf..b25d12e6d 100644 --- a/tests/e2e/features/skills.feature +++ b/tests/e2e/features/skills.feature @@ -1,4 +1,4 @@ -@e2e_group_2 @skip +@e2e_group_2 Feature: Agent skills tests Background: @@ -9,14 +9,14 @@ Feature: Agent skills tests # --- Skill tools registration --- - @SkillsConfig + @SkillsConfig @skip Scenario: Skill tools are registered when skills are configured - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration + And MCP toolgroups are reset for a new MCP configuration And The service is restarted When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 200 - And The body of the response is the following #TODO: Currently placeholder, should reflect actual tools (all tools not just skill tools) + And The body of the response is the following """ { "tools": [ @@ -56,7 +56,7 @@ Feature: Agent skills tests "type": "tool" }, { - "identifier": "activate_skill", + "identifier": "load_skill", "description": "Load full instructions for a skill. Call this when a task matches a skill's description.", "parameters": [ { @@ -73,7 +73,7 @@ Feature: Agent skills tests "type": "tool" }, { - "identifier": "load_skill_resource", + "identifier": "read_skill_resource", "description": "Load a file from a skill's references/ directory. Use this when skill instructions reference additional documentation.", "parameters": [ { @@ -133,10 +133,11 @@ Feature: Agent skills tests Scenario: Skill tools are not registered when no skills are configured Given The service uses the lightspeed-stack.yaml configuration + And MCP toolgroups are reset for a new MCP configuration And The service is restarted When I access REST API endpoint "tools" using HTTP GET method Then The status code of the response is 200 - And The body of the response is the following #TODO: Currently placeholder, should reflect actual tools (default tools, not skill tools) + And The body of the response is the following """ { "tools": [ @@ -166,7 +167,7 @@ Feature: Agent skills tests "server_source": "builtin", "type": "tool_group" } - ], + ] } """ @@ -174,8 +175,7 @@ Feature: Agent skills tests @SkillsConfig Scenario: LLM can discover skills via list_skills tool using query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question @@ -183,16 +183,22 @@ Feature: Agent skills tests {"query": "What skills are available? Use the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output" } ] """ @@ -200,8 +206,7 @@ Feature: Agent skills tests @SkillsConfig Scenario: LLM can discover skills via list_skills tool using streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question @@ -211,16 +216,22 @@ Feature: Agent skills tests When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output" } ] """ @@ -229,54 +240,70 @@ Feature: Agent skills tests # --- Skill activation --- @SkillsConfig - Scenario: LLM can activate a skill and use its instructions via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can Load a skill and use its instructions via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" } ] """ And The token metrics have increased @SkillsConfig - Scenario: LLM can activate a skill and use its instructions via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill and use its instructions via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "I need help with e2e testing. Use the activate_skill tool to load the e2e-test-skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Echo 'Hello World'. Use the load_skill tool to load the 'echo' skill.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" } ] """ @@ -286,53 +313,72 @@ Feature: Agent skills tests # --- Skill resource loading --- @SkillsConfig - Scenario: LLM can load a skill reference file via load_skill_resource tool using query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill reference file via read_skill_resource tool using query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, - } ] + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output" + } + ] """ And The token metrics have increased @SkillsConfig - Scenario: LLM can load a skill reference file via load_skill_resource tool using streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Scenario: LLM can load a skill reference file via read_skill_resource tool using streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "Load the reference file references/guide.md from the e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the reference file references/guide.md from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output" } ] """ @@ -340,103 +386,134 @@ Feature: Agent skills tests # --- Error handling: unknown skill --- - @SkillsConfig - Scenario: activate_skill returns error for unknown skill name via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: load_skill returns error for unknown skill name via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "nonexistent-skill" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ - @SkillsConfig - Scenario: activate_skill returns error for unknown skill name via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + + @SkillsConfig @skip + Scenario: load_skill returns error for unknown skill name via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ - {"query": "Activate a skill called nonexistent-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load a skill called nonexistent-skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "nonexistent-skill" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "activate_skill" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ # --- Error handling: missing resource --- - @SkillsConfig - Scenario: load_skill_resource returns error for nonexistent resource file via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: read_skill_resource returns error for nonexistent resource file via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/nonexistent.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ - @SkillsConfig - Scenario: load_skill_resource returns error for nonexistent resource file via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip + Scenario: read_skill_resource returns error for nonexistent resource file via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ - {"query": "Load references/nonexistent.md from e2e-test-skill using load_skill_resource.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load 'references/nonexistent.md' from the 'echo' skill. Use the read_skill_resource tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/nonexistent.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "load_skill_resource" "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, + "type": "function_call_output" } ] """ @@ -444,83 +521,101 @@ Feature: Agent skills tests # --- Context management: deduplication --- - @SkillsConfig + @SkillsConfig @skip Scenario: Duplicate skill activation in same conversation returns already-loaded note via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted When I use "query" to ask question """ - {"query": "Activate e2e-test-skill using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the 'echo' skill using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 And I store conversation details - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results - """ - [ - { - "id": "", - "name": "activate_skill" - "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, - } - ] - """ + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following + """ + [ + { + "status": "success", + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output" + } + ] + """ When I use "query" to ask question with same conversation_id """ - {"query": "Activate e2e-test-skill again using the activate_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Load the 'echo' skill again using the load_skill tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results - """ - [ - { - "id": "", - "name": "activate_skill" - "status": "failure", - "content": "", - "type": "tool_result", - "round": 1, - } - ] - """ + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following + """ + [ + { + "status": "failure", + "type": "function_call_output" + } + ] + """ # --- Multiple skills --- @SkillsMultiConfig Scenario: Skills directory path discovers all skills in subdirectories via query endpoint - Given The e2e-test-skill skill directory path is "skills/e2e-test-skill" - And The e2e-second-skill skill directory path is "skills/e2e-second-skill" - And The service uses the lightspeed-stack-skills-directory.yaml configuration + Given The service uses the lightspeed-stack-skills-directory.yaml configuration And The service is restarted When I use "query" to ask question """ {"query": "List all available skills using the list_skills tool.", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}", + "type": "function_call_output" } ] """ @SkillsMultiConfig Scenario: Skills directory path discovers all skills in subdirectories via streaming_query endpoint - Given The e2e-test-skill skill directory path is "skills/e2e-test-skill" - And The e2e-second-skill skill directory path is "skills/e2e-second-skill" - And The service uses the lightspeed-stack-skills-directory.yaml configuration + Given The service uses the lightspeed-stack-skills-directory.yaml configuration And The service is restarted When I use "streaming_query" to ask question """ @@ -529,103 +624,143 @@ Feature: Agent skills tests When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\",\"summarize\":\"Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text.\"}", + "type": "function_call_output" } ] """ # --- Full progressive disclosure flow --- - @SkillsConfig @flaky - Scenario: LLM completes list_skills then activate_skill then load_skill_resource via query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand) + Scenario: LLM completes list_skills then load_skill then read_skill_resource via query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "query" to ask question """ - {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"} """ Then The status code of the response is 200 - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + }, + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + }, + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output", + "round": 1 }, { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output", + "round": 2 }, { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output", + "round": 3 } ] """ - @SkillsConfig - Scenario: LLM completes list_skills then activate_skill then load_skill_resource via streaming_query endpoint - Given The e2e-test-skill skill directory path is "e2e-test-skill" - And The service uses the lightspeed-stack-skills-auth-noop-token.yaml configuration + @SkillsConfig @skip # TODO: This test is too flaky (should be run on demand) + Scenario: LLM completes list_skills then load_skill then read_skill_resource via streaming_query endpoint + Given The service uses the lightspeed-stack-skills.yaml configuration And The service is restarted And I capture the current token metrics When I use "streaming_query" to ask question """ - {"query": "Use the echo skill to echo this 'Hello World!'", "model": "{MODEL}", "provider": "{PROVIDER}"} + {"query": "Use Skills and follow progressive disclosure. Say 'Hello World'", "model": "{MODEL}", "provider": "{PROVIDER}"} """ When I wait for the response to be completed Then The status code of the response is 200 And The response is the last streamed fragment - And The body of the "tool_results" field is #TODO: Currently placeholder, should reflect actual tool results + And The body of the "tool_calls" field of the response is the following + """ + [ + { + "name": "list_skills", + "type": "function_call" + }, + { + "name": "load_skill", + "args": { + "skill_name": "echo" + }, + "type": "function_call" + }, + { + "name": "read_skill_resource", + "args": { + "skill_name": "echo", + "resource_name": "references/guide.md" + }, + "type": "function_call" + } + ] + """ + And The body of the "tool_results" field of the response is the following """ [ { - "id": "", - "name": "list_skills" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "{\"echo\":\"Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\"}", + "type": "function_call_output", + "round": 1 }, { - "id": "", - "name": "activate_skill" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "\necho\nEcho back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text.\n/app-root/skills/echo\n\n\n\n\n\n\n\n\n\n\n# Echo Skill\n\n## When to use this skill\n\nUse this skill when:\n- A user asks to echo or repeat text\n- A user wants to verify that the agent can return their input verbatim\n\n## Instructions\n\n1. Read the user's input text\n2. Return the exact text back to the user without modification\n\nFor formatting guidelines, see [references/guide.md](references/guide.md).\n\n\n", + "type": "function_call_output", + "round": 2 }, { - "id": "", - "name": "load_skill_resource" "status": "success", - "content": "", - "type": "tool_result", - "round": 1, + "content": "# Echo Formatting Guide\n\n## Output format\n\nWhen echoing text back to the user, follow these rules:\n\n- Preserve the exact input text without any modification\n- Do not add quotation marks around the echoed text\n- Do not add any prefix like \"Echo:\" or \"Output:\"\n- Return only the echoed text as the response\n- Preserve whitespace and line breaks exactly as provided\n\n## Examples\n\n**Input**: `Hello World!`\n**Output**: `Hello World!`\n\n**Input**: `multiple words with spaces`\n**Output**: `multiple words with spaces`\n", + "type": "function_call_output", + "round": 3 } ] """ diff --git a/tests/e2e/features/steps/common_http.py b/tests/e2e/features/steps/common_http.py index 24369eb7d..d5919d6bd 100644 --- a/tests/e2e/features/steps/common_http.py +++ b/tests/e2e/features/steps/common_http.py @@ -305,3 +305,31 @@ def set_header(context: Context, header_name: str) -> None: except json.JSONDecodeError: pass context.auth_headers[header_name] = value + + +@then('The body of the "{field}" field of the response is the following') +def check_response_field_body(context: Context, field: str) -> None: + """Check the content of a specific field in the response body. + + Parameters: + context: Behave context with ``response`` and/or ``response_data``. + field: Name of the field to check (e.g. ``tool_results``). + """ + if getattr(context, "use_streaming_response_data", False): + response_body = context.response_data + else: + assert context.response is not None, "Request needs to be performed first" + response_body = context.response.json() + + assert field in response_body, ( + f"Field '{field}' not found in response. " + f"Available fields: {list(response_body.keys())}" + ) + + actual_value = response_body[field] + + if not context.text: + return + + expected_value = json.loads(context.text) + validate_json_partially(actual_value, expected_value) diff --git a/tests/e2e/features/steps/llm_query_response.py b/tests/e2e/features/steps/llm_query_response.py index b0f992861..36b899ce0 100644 --- a/tests/e2e/features/steps/llm_query_response.py +++ b/tests/e2e/features/steps/llm_query_response.py @@ -364,7 +364,9 @@ def _parse_streaming_response(response_text: str) -> dict: lines = response_text.strip().split("\n") conversation_id = None full_response = "" - full_response_split = [] + full_response_split: list[str] = [] + tool_calls: list[dict[str, Any]] = [] + tool_results: list[dict[str, Any]] = [] finished = False stream_error = ( None # {"status_code": int, "response": str, "cause": str} if event "error" @@ -380,6 +382,10 @@ def _parse_streaming_response(response_text: str) -> dict: conversation_id = data["data"]["conversation_id"] elif event == "token": full_response_split.append(data["data"]["token"]) + elif event == "tool_call": + tool_calls.append(data["data"]) + elif event == "tool_result": + tool_results.append(data["data"]) elif event == "turn_complete": full_response = data["data"]["token"] elif event == "end": @@ -393,6 +399,23 @@ def _parse_streaming_response(response_text: str) -> dict: "conversation_id": conversation_id, "response": "".join(full_response_split), "response_complete": full_response, + "tool_calls": tool_calls, + "tool_results": tool_results, "finished": finished, "stream_error": stream_error, } + + +@then("The response is the last streamed fragment") +def response_is_last_streamed_fragment(context: Context) -> None: + """Assert streaming finished and flag context for field checks. + + Sets ``context.use_streaming_response_data`` so subsequent steps + read from ``context.response_data`` instead of the raw HTTP JSON. + """ + assert hasattr(context, "response_data"), "Streaming response has not been parsed" + assert ( + context.response_data.get("finished") is True + ), "Streaming response not finished" + context.use_streaming_response_data = True + print(context.response_data) diff --git a/tests/e2e/skills/echo/SKILL.md b/tests/e2e/skills/echo/SKILL.md new file mode 100644 index 000000000..e94e52612 --- /dev/null +++ b/tests/e2e/skills/echo/SKILL.md @@ -0,0 +1,19 @@ +--- +name: echo +description: Echo back the user's input exactly as provided. Use when a user asks to echo, repeat, or mirror text. +--- + +# Echo Skill + +## When to use this skill + +Use this skill when: +- A user asks to echo or repeat text +- A user wants to verify that the agent can return their input verbatim + +## Instructions + +1. Read the user's input text +2. Return the exact text back to the user without modification + +For formatting guidelines, see [references/guide.md](references/guide.md). diff --git a/tests/e2e/skills/echo/references/guide.md b/tests/e2e/skills/echo/references/guide.md new file mode 100644 index 000000000..dee54b305 --- /dev/null +++ b/tests/e2e/skills/echo/references/guide.md @@ -0,0 +1,19 @@ +# Echo Formatting Guide + +## Output format + +When echoing text back to the user, follow these rules: + +- Preserve the exact input text without any modification +- Do not add quotation marks around the echoed text +- Do not add any prefix like "Echo:" or "Output:" +- Return only the echoed text as the response +- Preserve whitespace and line breaks exactly as provided + +## Examples + +**Input**: `Hello World!` +**Output**: `Hello World!` + +**Input**: `multiple words with spaces` +**Output**: `multiple words with spaces` diff --git a/tests/e2e/skills/summarize/SKILL.md b/tests/e2e/skills/summarize/SKILL.md new file mode 100644 index 000000000..b9fd63d55 --- /dev/null +++ b/tests/e2e/skills/summarize/SKILL.md @@ -0,0 +1,21 @@ +--- +name: summarize +description: Summarize text into a concise single-sentence overview. Use when a user asks to summarize, condense, or shorten text. +--- + +# Summarize Skill + +## When to use this skill + +Use this skill when: +- A user asks to summarize or condense text +- A user wants a brief overview of longer content +- A user requests a TL;DR or short version + +## Instructions + +1. Read the user's input text +2. Identify the key point or main idea +3. Return a single concise sentence that captures the essence of the input + +For formatting guidelines, see [references/guide.md](references/guide.md). diff --git a/tests/e2e/skills/summarize/references/guide.md b/tests/e2e/skills/summarize/references/guide.md new file mode 100644 index 000000000..fa2985dd5 --- /dev/null +++ b/tests/e2e/skills/summarize/references/guide.md @@ -0,0 +1,20 @@ +# Summarize Formatting Guide + +## Output format + +When summarizing text for the user, follow these rules: + +- Return exactly one sentence +- Do not add quotation marks around the summary +- Do not add any prefix like "Summary:" or "TL;DR:" +- Keep the summary under 30 words +- Use simple, clear language +- Preserve the original meaning without adding interpretation + +## Examples + +**Input**: `The quick brown fox jumped over the lazy dog while the cat watched from the windowsill and the bird flew overhead.` +**Output**: `A fox jumped over a dog while a cat and bird observed nearby.` + +**Input**: `We need to upgrade our database server because the current one is running out of disk space and memory, which causes frequent timeouts during peak hours.` +**Output**: `The database server needs upgrading due to insufficient disk space and memory causing peak-hour timeouts.` diff --git a/tests/e2e/test_list.txt b/tests/e2e/test_list.txt index 8d6dd7fa8..6d34ca30d 100644 --- a/tests/e2e/test_list.txt +++ b/tests/e2e/test_list.txt @@ -32,3 +32,4 @@ features/tls-ca.feature features/tls-mtls.feature features/tls-tlsv13.feature features/opentelemetry.feature +features/skills.feature \ No newline at end of file