diff --git a/tools/ai/hugegraph-deepwiki-skill/.agents/plugins/marketplace.json b/tools/ai/hugegraph-deepwiki-skill/.agents/plugins/marketplace.json new file mode 100644 index 0000000000..7eafefca89 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/.agents/plugins/marketplace.json @@ -0,0 +1,20 @@ +{ + "name": "hugegraph-deepwiki-skill", + "interface": { + "displayName": "HugeGraph Repository Assistant" + }, + "plugins": [ + { + "name": "hugegraph-deepwiki-skill", + "source": { + "source": "local", + "path": "./plugins/hugegraph-deepwiki-skill" + }, + "policy": { + "installation": "AVAILABLE", + "authentication": "ON_INSTALL" + }, + "category": "Developer Tools" + } + ] +} diff --git a/tools/ai/hugegraph-deepwiki-skill/.claude-plugin/marketplace.json b/tools/ai/hugegraph-deepwiki-skill/.claude-plugin/marketplace.json new file mode 100644 index 0000000000..a0cecb8b04 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/.claude-plugin/marketplace.json @@ -0,0 +1,14 @@ +{ + "name": "hugegraph-deepwiki-skill", + "description": "Repository knowledge assistant for Apache HugeGraph.", + "owner": { + "name": "HugeGraph Community" + }, + "plugins": [ + { + "name": "hugegraph-deepwiki-skill", + "source": "./plugins/hugegraph-deepwiki-skill", + "description": "Ask repository-grounded questions about Apache HugeGraph." + } + ] +} diff --git a/tools/ai/hugegraph-deepwiki-skill/README-zh.md b/tools/ai/hugegraph-deepwiki-skill/README-zh.md new file mode 100644 index 0000000000..43f758b0f4 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/README-zh.md @@ -0,0 +1,121 @@ +# HugeGraph 仓库知识助手 + +[English](README.md) | [中文](README-zh.md) + +这个独立模块将 [Apache HugeGraph](https://github.com/apache/hugegraph) 源码仓库问答能力打包为 Claude Code 和 Codex 可安装的 skill。 + +DeepWiki 是底层线上知识库和 MCP 传输通道: + +```text +https://deepwiki.com/apache/hugegraph +https://mcp.deepwiki.com/mcp +``` + +## 功能 + +- 回答 HugeGraph 架构、模块、API、存储后端、schema、traversal、配置、构建、测试和实现细节相关问题。 +- 使用 `read_wiki_contents` 构建本地 DeepWiki wiki 缓存,并优先搜索缓存。 +- 当缓存内容不能直接、精准回答问题时,使用 `ask_question` 获取线上答案。 +- 普通问答不会 clone 上游源码仓库。 + +## 前置要求 + +- Python 3.9 或更高版本,用于运行随附的 MCP 客户端脚本。 +- 当前环境需要能访问 `https://mcp.deepwiki.com/mcp`。 + +## 目录结构 + +```text +tools/ai/hugegraph-deepwiki-skill/ +├── README.md +├── README-zh.md +├── .agents/plugins/marketplace.json +├── .claude-plugin/marketplace.json +└── plugins/hugegraph-deepwiki-skill/ + ├── .claude-plugin/plugin.json + ├── .codex-plugin/plugin.json + └── skills/hugegraph-deepwiki-skill/ + ├── SKILL.md + ├── agents/openai.yaml + ├── references/repos.json + └── scripts/deepwiki_mcp.py +``` + +## Claude Code 安装 + +从当前仓库安装: + +```bash +cd tools/ai/hugegraph-deepwiki-skill +claude plugin marketplace add "$(pwd)" +claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +从已发布分支安装时,先 clone 仓库,再从本地模块路径安装: + +```bash +git clone -b https://github.com//hugegraph.git +cd hugegraph/tools/ai/hugegraph-deepwiki-skill +claude plugin marketplace add "$(pwd)" +claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +手动安装用户级 skill: + +```bash +mkdir -p ~/.claude/skills +cp -R plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill ~/.claude/skills/ +``` + +### 让 Claude Code 自动安装 + +在 HugeGraph 仓库根目录的 Claude Code 里粘贴: + +```text +Install the HugeGraph repository assistant from this checkout. Enter `tools/ai/hugegraph-deepwiki-skill`, run `claude plugin marketplace add "$(pwd)"`, then run `claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill`. Do not hardcode absolute paths. +``` + +## Codex 安装 + +从当前仓库安装: + +```bash +cd tools/ai/hugegraph-deepwiki-skill +codex plugin marketplace add "$(pwd)" +codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +从已发布分支安装时,先 clone 仓库,再从本地模块路径安装: + +```bash +git clone -b https://github.com//hugegraph.git +cd hugegraph/tools/ai/hugegraph-deepwiki-skill +codex plugin marketplace add "$(pwd)" +codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +如果当前 Codex 版本不能直接安装 plugin,可以安装 raw skill: + +```bash +CODEX_HOME="${CODEX_HOME:-$HOME/.codex}" +mkdir -p "$CODEX_HOME/skills" +cp -R plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill "$CODEX_HOME/skills/" +``` + +### 让 Codex 自动安装 + +在 HugeGraph 仓库根目录的 Codex 里粘贴: + +```text +Install the HugeGraph repository assistant from this checkout. Enter `tools/ai/hugegraph-deepwiki-skill`, run `codex plugin marketplace add "$(pwd)"`, then run `codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill`. If this Codex build has no plugin add command, copy `plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill` into `${CODEX_HOME:-$HOME/.codex}/skills`. Do not hardcode absolute paths. +``` + +## 使用方式 + +安装后,可以在提问时显式指定: + +```text +Use $hugegraph-deepwiki-skill to explain HugeGraph schema and traversal behavior. +``` + +HugeGraph AI 相关问题请安装 `apache/hugegraph-ai` 仓库中的独立 HugeGraph AI 仓库知识助手。 diff --git a/tools/ai/hugegraph-deepwiki-skill/README.md b/tools/ai/hugegraph-deepwiki-skill/README.md new file mode 100644 index 0000000000..bdc778e803 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/README.md @@ -0,0 +1,121 @@ +# HugeGraph Repository Assistant + +[中文](README-zh.md) | [English](README.md) + +This standalone module packages a Claude Code and Codex skill for answering questions about the [Apache HugeGraph](https://github.com/apache/hugegraph) source repository. + +DeepWiki is used as the online knowledge and MCP transport layer: + +```text +https://deepwiki.com/apache/hugegraph +https://mcp.deepwiki.com/mcp +``` + +## What It Does + +- Answers repository-grounded questions about HugeGraph architecture, modules, APIs, storage backends, schema, traversal, configuration, build, tests, and implementation details. +- Uses `read_wiki_contents` to build a local DeepWiki wiki cache and searches that cache before answering. +- Uses `ask_question` when the cached context does not directly and precisely answer the question. +- Avoids cloning upstream repositories for ordinary Q&A. + +## Requirements + +- Python 3.9 or later for the bundled MCP client script. +- Network access to `https://mcp.deepwiki.com/mcp`. + +## Layout + +```text +tools/ai/hugegraph-deepwiki-skill/ +├── README.md +├── README-zh.md +├── .agents/plugins/marketplace.json +├── .claude-plugin/marketplace.json +└── plugins/hugegraph-deepwiki-skill/ + ├── .claude-plugin/plugin.json + ├── .codex-plugin/plugin.json + └── skills/hugegraph-deepwiki-skill/ + ├── SKILL.md + ├── agents/openai.yaml + ├── references/repos.json + └── scripts/deepwiki_mcp.py +``` + +## Claude Code Install + +From this repository: + +```bash +cd tools/ai/hugegraph-deepwiki-skill +claude plugin marketplace add "$(pwd)" +claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +From a published branch, clone the repository first and install from the local module path: + +```bash +git clone -b https://github.com//hugegraph.git +cd hugegraph/tools/ai/hugegraph-deepwiki-skill +claude plugin marketplace add "$(pwd)" +claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +Manual user-level skill install: + +```bash +mkdir -p ~/.claude/skills +cp -R plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill ~/.claude/skills/ +``` + +### Ask Claude Code To Install It + +Paste this into Claude Code from the HugeGraph repository root: + +```text +Install the HugeGraph repository assistant from this checkout. Enter `tools/ai/hugegraph-deepwiki-skill`, run `claude plugin marketplace add "$(pwd)"`, then run `claude plugin install hugegraph-deepwiki-skill@hugegraph-deepwiki-skill`. Do not hardcode absolute paths. +``` + +## Codex Install + +From this repository: + +```bash +cd tools/ai/hugegraph-deepwiki-skill +codex plugin marketplace add "$(pwd)" +codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +From a published branch, clone the repository first and install from the local module path: + +```bash +git clone -b https://github.com//hugegraph.git +cd hugegraph/tools/ai/hugegraph-deepwiki-skill +codex plugin marketplace add "$(pwd)" +codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill +``` + +If your Codex build cannot install plugins directly, install the raw skill: + +```bash +CODEX_HOME="${CODEX_HOME:-$HOME/.codex}" +mkdir -p "$CODEX_HOME/skills" +cp -R plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill "$CODEX_HOME/skills/" +``` + +### Ask Codex To Install It + +Paste this into Codex from the HugeGraph repository root: + +```text +Install the HugeGraph repository assistant from this checkout. Enter `tools/ai/hugegraph-deepwiki-skill`, run `codex plugin marketplace add "$(pwd)"`, then run `codex plugin add hugegraph-deepwiki-skill@hugegraph-deepwiki-skill`. If this Codex build has no plugin add command, copy `plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill` into `${CODEX_HOME:-$HOME/.codex}/skills`. Do not hardcode absolute paths. +``` + +## Usage + +After installation, ask for the skill explicitly when needed: + +```text +Use $hugegraph-deepwiki-skill to explain HugeGraph schema and traversal behavior. +``` + +For HugeGraph AI questions, install the separate HugeGraph AI repository assistant from the `apache/hugegraph-ai` repository instead. diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.claude-plugin/plugin.json b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.claude-plugin/plugin.json new file mode 100644 index 0000000000..e07fe169e3 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.claude-plugin/plugin.json @@ -0,0 +1,13 @@ +{ + "name": "hugegraph-deepwiki-skill", + "description": "Repository knowledge assistant for Apache HugeGraph.", + "version": "0.1.4", + "author": { + "name": "HugeGraph Community" + }, + "homepage": "https://github.com/apache/hugegraph", + "repository": "https://github.com/apache/hugegraph", + "license": "Apache-2.0", + "keywords": ["hugegraph", "deepwiki", "apache", "knowledge-assistant"], + "skills": "./skills/" +} diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.codex-plugin/plugin.json b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.codex-plugin/plugin.json new file mode 100644 index 0000000000..ebea447542 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/.codex-plugin/plugin.json @@ -0,0 +1,28 @@ +{ + "name": "hugegraph-deepwiki-skill", + "version": "0.1.4", + "description": "Repository knowledge assistant for Apache HugeGraph.", + "author": { + "name": "HugeGraph Community", + "email": "", + "url": "https://github.com/apache/hugegraph" + }, + "homepage": "https://github.com/apache/hugegraph", + "repository": "https://github.com/apache/hugegraph", + "license": "Apache-2.0", + "keywords": ["hugegraph", "deepwiki", "apache", "knowledge-assistant"], + "skills": "./skills/", + "interface": { + "displayName": "HugeGraph Repository Assistant", + "shortDescription": "Ask repository-grounded questions about Apache HugeGraph.", + "longDescription": "Provides an Apache HugeGraph repository knowledge assistant for architecture, API, configuration, storage, traversal, schema, and implementation questions. DeepWiki MCP is used as the underlying retrieval channel.", + "developerName": "HugeGraph Community", + "category": "Developer Tools", + "capabilities": ["Knowledge", "MCP"], + "websiteURL": "https://github.com/apache/hugegraph", + "defaultPrompt": [ + "Use $hugegraph-deepwiki-skill to answer my HugeGraph repository question." + ], + "brandColor": "#2563EB" + } +} diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/SKILL.md b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/SKILL.md new file mode 100644 index 0000000000..9d65776e96 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/SKILL.md @@ -0,0 +1,86 @@ +--- +name: hugegraph-deepwiki-skill +description: Use this skill as a repository knowledge assistant for Apache HugeGraph, apache/hugegraph source code, architecture, modules, APIs, configuration, storage backends, Gremlin/traversal behavior, schema/modeling, server/client tooling, build/test workflows, or implementation details. It answers questions grounded in apache/hugegraph and uses the official DeepWiki MCP wiki as the underlying retrieval channel. +metadata: + short-description: Apache HugeGraph repository assistant +--- + +# HugeGraph Repository Knowledge Assistant + +Answer questions about the Apache HugeGraph source repository. Use the official DeepWiki MCP server as the underlying knowledge retrieval channel. + +- Source repository: `https://github.com/apache/hugegraph` +- DeepWiki page: `https://deepwiki.com/apache/hugegraph` +- MCP endpoint: `https://mcp.deepwiki.com/mcp` +- Default repository: `apache/hugegraph` +- Runtime requirements: Python 3.9+ and network access to the MCP endpoint. + +## Default Workflow + +1. Preserve the user's question, including code snippets, version constraints, error messages, and environment details. +2. Change directory to this skill directory, the directory containing this `SKILL.md`. +3. Search the local DeepWiki wiki cache for relevant context. If the cache does not exist yet, this command fetches `read_wiki_contents` from DeepWiki once and saves it under the user's cache directory. It prints only relevant snippets, not the full wiki dump: + +```bash +python3 scripts/deepwiki_mcp.py context --repo hugegraph --query "" +``` + +4. Answer from cached context only when the snippets directly and precisely answer the user's question. If they are merely related background, continue to `ask`. +5. For broad navigation questions, read the wiki structure instead: + +```bash +python3 scripts/deepwiki_mcp.py structure --repo hugegraph +``` + +6. If the cached wiki context does not directly and precisely answer the question, do not answer the user yet. You must use DeepWiki's AI `ask_question` tool to request an online answer: + +```bash +python3 scripts/deepwiki_mcp.py ask --repo hugegraph --question "" +``` + +7. For `ask`, preserve the user's original question. Do not expand it with extra requirements, long source-reference requests, or your own multi-part prompt; longer generated questions are more likely to time out. +8. If `ask` returns uncertainty, times out, or reports a transport/query error, retry once with the shortest faithful form of the user's original question. If it still fails, say so plainly and answer only from the cached context if it is sufficient. +9. If the user needs source references for an `ask` answer, use the cached context or contents to identify the relevant wiki page snippets and source-file references. `ask` usually returns the final answer plus suggested wiki pages or a DeepWiki search link, not the raw code files used to generate the answer. + +## Routing Rules + +- Use `structure` first for navigation, table-of-contents, or "where should I look?" questions. +- Use `context` first for normal Q&A, source-reference requests, and token-efficient grounding. +- Use `ask` after `context` whenever cached snippets do not provide a direct and precise answer, or when the question needs synthesis across multiple areas. Do not answer directly from related-but-insufficient cached snippets. +- If both an online answer and source references are needed, run `ask` for the answer and use `context` to collect source references. +- Do not clone the repository for ordinary Q&A or verification. If current source verification is truly required, prefer online source links or raw GitHub files and clearly distinguish that from DeepWiki-grounded content. + +## When to Read Structure or Pages + +For broad navigation questions, or when the user asks where something lives, inspect the wiki structure: + +```bash +cd +python3 scripts/deepwiki_mcp.py structure --repo hugegraph +``` + +If the user needs a fuller wiki dump for offline review or synthesis, read the wiki contents: + +```bash +cd +python3 scripts/deepwiki_mcp.py contents --repo hugegraph +``` + +The `contents` command uses the same local cache by default. Use `--refresh` only when the user explicitly needs a fresh DeepWiki snapshot. + +For normal Q&A, prefer `context` over `contents` so only the relevant cached snippets enter the model context. When the cached wiki context does not directly and precisely answer the question, run `ask` for an online DeepWiki answer before responding. + +## Repository Profile + +The repository alias lives in `references/repos.json`. + +- `hugegraph` maps to `apache/hugegraph`. +- For Apache HugeGraph AI questions, use the separate `hugegraph-ai-deepwiki-skill` instead of this skill. + +## Answering Guidance + +- Keep responses practical: include class/module names, configuration keys, command names, or API names when DeepWiki provides them. +- Prefer online DeepWiki retrieval and cached wiki search. Do not clone the source repository just to answer a question. +- If the user asks for code changes in a local HugeGraph checkout, use DeepWiki for orientation, then inspect and edit the local repository directly. +- Do not invent details that DeepWiki does not provide. Clearly distinguish DeepWiki-grounded facts from your own inference. +- For version-sensitive release, dependency, or API-compatibility questions, verify with the live repository or official docs when the user needs current facts beyond the DeepWiki answer. diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/agents/openai.yaml b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/agents/openai.yaml new file mode 100644 index 0000000000..48842c8fbe --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/agents/openai.yaml @@ -0,0 +1,30 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +interface: + display_name: "HugeGraph Repository Assistant" + short_description: "Answers Apache HugeGraph repository questions" + default_prompt: "Use $hugegraph-deepwiki-skill to answer my Apache HugeGraph repository question." + +dependencies: + tools: + - type: "mcp" + value: "deepwiki" + description: "Official DeepWiki MCP server used as the repository knowledge retrieval channel" + transport: "streamable_http" + url: "https://mcp.deepwiki.com/mcp" + +policy: + allow_implicit_invocation: true diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/references/repos.json b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/references/repos.json new file mode 100644 index 0000000000..86663b913c --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/references/repos.json @@ -0,0 +1,9 @@ +{ + "hugegraph": { + "repoName": "apache/hugegraph", + "deepwiki": "https://deepwiki.com/apache/hugegraph", + "github": "https://github.com/apache/hugegraph", + "enabled": true, + "description": "Apache HugeGraph core graph database repository" + } +} diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/scripts/deepwiki_mcp.py b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/scripts/deepwiki_mcp.py new file mode 100755 index 0000000000..c8b81df3ca --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/skills/hugegraph-deepwiki-skill/scripts/deepwiki_mcp.py @@ -0,0 +1,572 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Small DeepWiki MCP client for repository-scoped Q&A.""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import socket +import sys +import tempfile +import time +import urllib.error +import urllib.parse +import urllib.request +from pathlib import Path +from typing import Any, Optional + + +DEFAULT_ENDPOINT = "https://mcp.deepwiki.com/mcp" +CLIENT_NAME = "hugegraph-deepwiki-skill" +SCRIPT_DIR = Path(__file__).resolve().parent +SKILL_DIR = SCRIPT_DIR.parent +PLUGIN_MANIFEST_PATH = SKILL_DIR.parent.parent / ".codex-plugin" / "plugin.json" +REPOS_PATH = SKILL_DIR / "references" / "repos.json" +CLIENT_VERSION_FALLBACK = "0.1.4" +CONTEXT_WINDOW_SIZE = 30 +CONTEXT_STRIDE = 10 +STOPWORDS = { + "a", + "an", + "and", + "apache", + "are", + "as", + "for", + "hugegraph", + "how", + "in", + "is", + "it", + "of", + "on", + "or", + "the", + "to", + "used", + "what", + "where", + "which", + "why", +} + + +class McpError(RuntimeError): + pass + + +def env_float(name: str, default: float) -> float: + raw_value = os.environ.get(name) + if raw_value is None: + return default + try: + return float(raw_value) + except ValueError as exc: + raise McpError(f"{name} must be a number, got {raw_value!r}.") from exc + + +def stream_timeout_seconds() -> float: + return max(1.0, env_float("DEEPWIKI_MCP_STREAM_TIMEOUT", 120.0)) + + +def load_client_version() -> str: + try: + parsed = json.loads(PLUGIN_MANIFEST_PATH.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return CLIENT_VERSION_FALLBACK + if isinstance(parsed, dict) and isinstance(parsed.get("version"), str): + return parsed["version"] + return CLIENT_VERSION_FALLBACK + + +CLIENT_VERSION = load_client_version() + + +def preview_text(text: str, limit: int = 500) -> str: + if len(text) <= limit: + return text + return f"{text[:limit]}..." + + +def positive_int(value: str) -> int: + try: + parsed = int(value) + except ValueError as exc: + raise argparse.ArgumentTypeError("--limit must be an integer") from exc + if parsed < 1: + raise argparse.ArgumentTypeError("--limit must be >= 1") + return parsed + + +def load_repos() -> dict[str, dict[str, Any]]: + try: + with REPOS_PATH.open("r", encoding="utf-8") as file: + repos = json.load(file) + except FileNotFoundError as exc: + raise McpError(f"Repository profile file is missing: {REPOS_PATH}") from exc + except json.JSONDecodeError as exc: + raise McpError(f"Repository profile file is not valid JSON: {REPOS_PATH}") from exc + + if not isinstance(repos, dict): + raise McpError(f"Repository profile file must contain a JSON object: {REPOS_PATH}") + return repos + + +def resolve_repo(alias_or_name: str) -> str: + repos = load_repos() + profile = repos.get(alias_or_name) + if profile is None: + for candidate in repos.values(): + if not isinstance(candidate, dict) or not candidate.get("enabled", False): + continue + repo_name = candidate.get("repoName") + if repo_name == alias_or_name: + return alias_or_name + known = ", ".join(sorted(repos)) + known_repo_names = ", ".join( + sorted( + profile["repoName"] + for profile in repos.values() + if isinstance(profile, dict) + and profile.get("enabled", False) + and isinstance(profile.get("repoName"), str) + ) + ) + raise McpError( + f"Unknown repository '{alias_or_name}'. Known aliases: {known}. " + f"Known repository names: {known_repo_names}." + ) + if not isinstance(profile, dict): + raise McpError(f"Repository profile for '{alias_or_name}' must be a JSON object.") + if not profile.get("enabled", False): + raise McpError( + f"Repository alias '{alias_or_name}' is reserved but not enabled yet " + f"({profile.get('repoName')})." + ) + repo_name = profile.get("repoName") + if not isinstance(repo_name, str) or not repo_name: + raise McpError(f"Repository alias '{alias_or_name}' is missing a valid repoName.") + return repo_name + + +def cache_root() -> Path: + configured = os.environ.get("DEEPWIKI_MCP_CACHE_DIR") + if configured: + return Path(configured).expanduser() + xdg_cache = os.environ.get("XDG_CACHE_HOME") + if xdg_cache: + return Path(xdg_cache).expanduser() / "deepwiki-mcp" + try: + return Path.home() / ".cache" / "deepwiki-mcp" + except RuntimeError: + return Path(tempfile.gettempdir()) / "deepwiki-mcp" + + +def repo_cache_dir(repo_name: str) -> Path: + return cache_root() / repo_name.replace("/", "__") + + +def contents_cache_path(repo_name: str) -> Path: + return repo_cache_dir(repo_name) / "wiki-contents.md" + + +def write_text_atomic(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path: Optional[Path] = None + try: + with tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + dir=path.parent, + prefix=f"{path.name}.", + suffix=".tmp", + delete=False, + ) as tmp_file: + tmp_file.write(text) + tmp_path = Path(tmp_file.name) + tmp_path.replace(path) + finally: + if tmp_path is not None and tmp_path.exists(): + tmp_path.unlink() + + +def parse_json(data: str) -> dict[str, Any]: + try: + parsed = json.loads(data) + except json.JSONDecodeError as exc: + raise McpError(f"DeepWiki MCP returned non-JSON content: {preview_text(data)}") from exc + if not isinstance(parsed, dict): + raise McpError(f"DeepWiki MCP returned an unexpected JSON payload: {preview_text(data)}") + return parsed + + +def read_sse_response(response: Any, expected_id: Optional[int]) -> dict[str, Any]: + data_lines: list[str] = [] + seen_payloads: list[str] = [] + max_seconds = stream_timeout_seconds() + deadline = time.monotonic() + max_seconds + timed_out = False + + while True: + if time.monotonic() > deadline: + timed_out = True + break + try: + raw_line = response.readline() + except (TimeoutError, socket.timeout): + timed_out = True + break + if not raw_line: + break + + line = raw_line.decode("utf-8", errors="replace").rstrip("\r\n") + if line.startswith("data:"): + data_content = line[5:] + if data_content.startswith(" "): + data_content = data_content[1:] + data_lines.append(data_content) + continue + if line: + continue + + if not data_lines: + continue + + data = "\n".join(data_lines) + data_lines = [] + seen_payloads.append(data) + parsed = parse_json(data) + if expected_id is None or parsed.get("id") == expected_id: + return parsed + + if data_lines: + data = "\n".join(data_lines) + seen_payloads.append(data) + if not timed_out: + parsed = parse_json(data) + if expected_id is None or parsed.get("id") == expected_id: + return parsed + + preview = "\n".join(seen_payloads[-3:]) + if timed_out: + raise McpError( + f"DeepWiki MCP stream timed out waiting for response id {expected_id} " + f"after {max_seconds:.0f}s: {preview_text(preview)}" + ) + raise McpError(f"DeepWiki MCP stream ended without response id {expected_id}: {preview_text(preview)}") + + +class McpClient: + def __init__(self, endpoint: str, protocol_version: str) -> None: + parsed_endpoint = urllib.parse.urlparse(endpoint) + if parsed_endpoint.scheme not in {"http", "https"}: + scheme = parsed_endpoint.scheme or "" + raise McpError(f"Unsupported DeepWiki MCP endpoint scheme: {scheme}") + self.endpoint = endpoint + self.protocol_version = protocol_version + self.session_id: Optional[str] = None + self.next_id = 1 + + def request(self, payload: dict[str, Any], expect_response: bool = True) -> Optional[dict[str, Any]]: + body = json.dumps(payload).encode("utf-8") + headers = { + "Accept": "application/json, text/event-stream", + "Content-Type": "application/json", + "Mcp-Protocol-Version": self.protocol_version, + "User-Agent": f"{CLIENT_NAME}/{CLIENT_VERSION}", + } + if self.session_id: + headers["Mcp-Session-Id"] = self.session_id + + req = urllib.request.Request(self.endpoint, data=body, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=stream_timeout_seconds()) as response: + session_id = response.headers.get("Mcp-Session-Id") + if session_id: + self.session_id = session_id + if not expect_response: + return None + content_type = response.headers.get("Content-Type", "") + if "text/event-stream" in content_type: + parsed = read_sse_response(response, payload.get("id")) + else: + text = response.read().decode("utf-8", errors="replace") + if not text.strip(): + raise McpError("DeepWiki MCP returned an empty response.") + parsed = parse_json(text) + except urllib.error.HTTPError as exc: + details = exc.read().decode("utf-8", errors="replace") + content_type = exc.headers.get("Content-Type", "unknown") + raise McpError( + f"DeepWiki MCP HTTP {exc.code} ({content_type}): {preview_text(details)}" + ) from exc + except (TimeoutError, socket.timeout) as exc: + raise McpError(f"DeepWiki MCP request timed out after {stream_timeout_seconds():.0f}s.") from exc + except urllib.error.URLError as exc: + if isinstance(exc.reason, (TimeoutError, socket.timeout)): + raise McpError(f"DeepWiki MCP request timed out after {stream_timeout_seconds():.0f}s.") from exc + raise McpError(f"Could not reach DeepWiki MCP endpoint: {exc.reason}") from exc + + if "error" in parsed: + raise McpError(f"DeepWiki MCP error: {json.dumps(parsed['error'], ensure_ascii=False)}") + return parsed + + def rpc(self, method: str, params: Optional[dict[str, Any]] = None) -> dict[str, Any]: + payload: dict[str, Any] = {"jsonrpc": "2.0", "id": self.next_id, "method": method} + self.next_id += 1 + if params is not None: + payload["params"] = params + result = self.request(payload) + if result is None: + raise McpError(f"DeepWiki MCP returned no response for {method}.") + return result + + def notify(self, method: str, params: Optional[dict[str, Any]] = None) -> None: + payload: dict[str, Any] = {"jsonrpc": "2.0", "method": method} + if params is not None: + payload["params"] = params + self.request(payload, expect_response=False) + + def initialize(self) -> None: + self.rpc( + "initialize", + { + "protocolVersion": self.protocol_version, + "capabilities": {}, + "clientInfo": {"name": CLIENT_NAME, "version": CLIENT_VERSION}, + }, + ) + self.notify("notifications/initialized", {}) + + def call_tool(self, name: str, arguments: dict[str, Any]) -> Any: + response = self.rpc("tools/call", {"name": name, "arguments": arguments}) + return response.get("result") + + +def extract_text(result: Any) -> str: + if isinstance(result, dict): + content = result.get("content") + if isinstance(content, list): + chunks: list[str] = [] + for item in content: + if isinstance(item, dict): + text = item.get("text") + if isinstance(text, str): + chunks.append(text) + elif item.get("type") == "json": + chunks.append(json.dumps(item, ensure_ascii=False, indent=2)) + if chunks: + return "\n\n".join(chunks) + if "structuredContent" in result: + return json.dumps(result["structuredContent"], ensure_ascii=False, indent=2) + return json.dumps(result, ensure_ascii=False, indent=2) + + +def output_tool_result(client: McpClient, tool: str, arguments: dict[str, Any]) -> None: + client.initialize() + result = client.call_tool(tool, arguments) + print(extract_text(result)) + + +def read_wiki_contents(client: McpClient, repo_name: str) -> str: + client.initialize() + result = client.call_tool("read_wiki_contents", {"repoName": repo_name}) + return extract_text(result) + + +def ensure_cached_contents(client: McpClient, repo_name: str, refresh: bool = False) -> tuple[str, Path, str]: + path = contents_cache_path(repo_name) + if path.exists() and not refresh: + try: + return path.read_text(encoding="utf-8"), path, "reused local cache" + except (OSError, UnicodeError): + pass + + text = read_wiki_contents(client, repo_name) + try: + write_text_atomic(path, text) + except (OSError, UnicodeError) as exc: + return text, path, f"fetched from DeepWiki; cache write skipped ({exc})" + return text, path, "refreshed from DeepWiki" + + +def query_terms(query: str) -> list[str]: + raw_terms = re.findall(r"[\w./:-]+|[\u4e00-\u9fff]+", query.lower()) + terms: list[str] = [] + for term in raw_terms: + normalized = term.strip("._/:;-") + if len(normalized) < 2 or normalized in STOPWORDS: + continue + if normalized not in terms: + terms.append(normalized) + return terms + + +def build_term_patterns(terms: list[str]) -> list[tuple[re.Pattern[str], int]]: + patterns: list[tuple[re.Pattern[str], int]] = [] + for term in terms: + pattern = rf"(? int: + score = 0 + for pattern, weight in patterns: + count = len(pattern.findall(lowered)) + if count: + score += count * weight + if "relevant source files" in lowered: + score -= 40 + if lowered.count("src/main/") > 4 or lowered.count(".java") > 6: + score -= 60 + return score + + +def search_cached_context(contents: str, query: str, limit: int) -> list[tuple[int, int, int, str]]: + terms = query_terms(query) + if not terms: + return [] + patterns = build_term_patterns(terms) + + lines = contents.splitlines() + candidates: list[tuple[int, int, int, str]] = [] + + for start in range(0, len(lines), CONTEXT_STRIDE): + end = min(len(lines), start + CONTEXT_WINDOW_SIZE) + window = "\n".join(lines[start:end]).strip() + if not window: + continue + score = score_window(window.lower(), patterns) + if score > 0: + candidates.append((score, start + 1, end, window)) + + candidates.sort(key=lambda item: item[0], reverse=True) + selected: list[tuple[int, int, int, str]] = [] + selected_ranges: list[tuple[int, int]] = [] + for candidate in candidates: + _, start, end, _ = candidate + if any(start <= kept_end and end >= kept_start for kept_start, kept_end in selected_ranges): + continue + selected.append(candidate) + selected_ranges.append((start, end)) + if len(selected) >= limit: + break + return selected + + +def output_context(client: McpClient, repo_name: str, query: str, limit: int, refresh: bool) -> None: + contents, path, cache_status = ensure_cached_contents(client, repo_name, refresh) + matches = search_cached_context(contents, query, limit) + + print("# DeepWiki Cached Context") + print(f"Repository: {repo_name}") + print(f"Cache file: {path.name}") + print(f"Cache status: {cache_status}") + print(f"Query: {query}") + print() + + if not matches: + print("No relevant cached DeepWiki wiki snippets were found for this query.") + print("Fallback: use the `ask` command to request an online DeepWiki answer.") + return + + for index, (score, start, end, snippet) in enumerate(matches, start=1): + print(f"## Snippet {index} (score: {score}, lines: {start}-{end})") + print("```text") + print(snippet[:4000]) + print("```") + print() + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Ask the official DeepWiki MCP server.") + parser.add_argument( + "--endpoint", + default=os.environ.get("DEEPWIKI_MCP_ENDPOINT", DEFAULT_ENDPOINT), + help=f"DeepWiki MCP endpoint. Defaults to {DEFAULT_ENDPOINT}.", + ) + parser.add_argument( + "--protocol-version", + default=os.environ.get("DEEPWIKI_MCP_PROTOCOL_VERSION", "2025-06-18"), + help="MCP protocol version to send during initialize.", + ) + + subparsers = parser.add_subparsers(dest="command", required=True) + + ask = subparsers.add_parser("ask", help="Ask a repository question.") + ask.add_argument("--repo", default="hugegraph", help="Repository alias.") + ask.add_argument("--question", required=True, help="Question to ask DeepWiki.") + + structure = subparsers.add_parser("structure", help="Read wiki structure.") + structure.add_argument("--repo", default="hugegraph", help="Repository alias.") + + contents = subparsers.add_parser("contents", help="Read wiki contents.") + contents.add_argument("--repo", default="hugegraph", help="Repository alias.") + contents.add_argument("--refresh", action="store_true", help="Refresh the local DeepWiki contents cache.") + + context = subparsers.add_parser("context", help="Search cached DeepWiki wiki contents for a question.") + context.add_argument("--repo", default="hugegraph", help="Repository alias.") + context.add_argument("--query", required=True, help="Question or keywords to search in cached wiki contents.") + context.add_argument("--limit", type=positive_int, default=6, help="Maximum number of snippets to print.") + context.add_argument("--refresh", action="store_true", help="Refresh the local DeepWiki contents cache before search.") + + tools = subparsers.add_parser("tools", help="List MCP tools for troubleshooting.") + tools.set_defaults(command="tools") + + return parser + + +def main() -> int: + parser = build_parser() + args = parser.parse_args() + + try: + client = McpClient(args.endpoint, args.protocol_version) + if args.command == "ask": + repo_name = resolve_repo(args.repo) + output_tool_result( + client, + "ask_question", + {"repoName": repo_name, "question": args.question}, + ) + elif args.command == "structure": + repo_name = resolve_repo(args.repo) + output_tool_result(client, "read_wiki_structure", {"repoName": repo_name}) + elif args.command == "contents": + repo_name = resolve_repo(args.repo) + contents_text, _, _ = ensure_cached_contents(client, repo_name, args.refresh) + print(contents_text) + elif args.command == "context": + repo_name = resolve_repo(args.repo) + output_context(client, repo_name, args.query, args.limit, args.refresh) + elif args.command == "tools": + client.initialize() + print(json.dumps(client.rpc("tools/list", {}).get("result"), ensure_ascii=False, indent=2)) + else: + parser.error(f"Unhandled command {args.command}") + except McpError as exc: + print(f"deepwiki_mcp.py: {exc}", file=sys.stderr) + return 2 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/tests/test_deepwiki_mcp.py b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/tests/test_deepwiki_mcp.py new file mode 100644 index 0000000000..402a72fe44 --- /dev/null +++ b/tools/ai/hugegraph-deepwiki-skill/plugins/hugegraph-deepwiki-skill/tests/test_deepwiki_mcp.py @@ -0,0 +1,143 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import importlib.util +import os +import sys +import tempfile +import unittest +from pathlib import Path +from unittest import mock + + +SCRIPT_PATH = ( + Path(__file__).resolve().parents[1] + / "skills" + / "hugegraph-deepwiki-skill" + / "scripts" + / "deepwiki_mcp.py" +) + + +def load_mcp_module(): + spec = importlib.util.spec_from_file_location( + "deepwiki_mcp_under_test", SCRIPT_PATH + ) + if spec is None or spec.loader is None: + raise RuntimeError(f"Could not load {SCRIPT_PATH}") + module = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +mcp = load_mcp_module() + + +class TimeoutResponse: + def readline(self): + raise TimeoutError() + + +class PartialTimeoutResponse: + def __init__(self): + self.lines = [b'data: {"jsonrpc":"2.0","id":7,\n'] + + def readline(self): + if self.lines: + return self.lines.pop(0) + raise TimeoutError() + + +class DeepWikiMcpTest(unittest.TestCase): + def test_resolve_repo_accepts_alias_and_full_repo_name(self): + self.assertEqual("apache/hugegraph", mcp.resolve_repo("hugegraph")) + self.assertEqual("apache/hugegraph", mcp.resolve_repo("apache/hugegraph")) + + def test_read_sse_response_reports_socket_timeout(self): + with ( + mock.patch.dict(os.environ, {"DEEPWIKI_MCP_STREAM_TIMEOUT": "1"}), + self.assertRaisesRegex(mcp.McpError, "timed out waiting for response id 7"), + ): + mcp.read_sse_response(TimeoutResponse(), 7) + + def test_read_sse_response_reports_partial_event_timeout(self): + with ( + mock.patch.dict(os.environ, {"DEEPWIKI_MCP_STREAM_TIMEOUT": "1"}), + self.assertRaisesRegex(mcp.McpError, "timed out waiting for response id 7"), + ): + mcp.read_sse_response(PartialTimeoutResponse(), 7) + + def test_cache_write_failure_returns_fetched_contents(self): + with tempfile.TemporaryDirectory() as tmp_dir: + cache_path = Path(tmp_dir) / "apache__hugegraph" / "wiki-contents.md" + with ( + mock.patch.object(mcp, "contents_cache_path", return_value=cache_path), + mock.patch.object( + mcp, "read_wiki_contents", return_value="fresh wiki" + ) as read_wiki, + mock.patch.object( + mcp, "write_text_atomic", side_effect=OSError("readonly") + ), + ): + text, path, status = mcp.ensure_cached_contents( + object(), "apache/hugegraph" + ) + + self.assertEqual("fresh wiki", text) + self.assertEqual(cache_path, path) + self.assertIn("cache write skipped", status) + read_wiki.assert_called_once() + + def test_bad_cached_contents_are_refetched(self): + with tempfile.TemporaryDirectory() as tmp_dir: + cache_path = Path(tmp_dir) / "apache__hugegraph" / "wiki-contents.md" + cache_path.parent.mkdir(parents=True) + cache_path.write_bytes(b"\xff\xfe") + with ( + mock.patch.object(mcp, "contents_cache_path", return_value=cache_path), + mock.patch.object( + mcp, "read_wiki_contents", return_value="fresh wiki" + ) as read_wiki, + ): + text, path, status = mcp.ensure_cached_contents( + object(), "apache/hugegraph" + ) + + self.assertEqual("fresh wiki", text) + self.assertEqual(cache_path, path) + self.assertEqual("refreshed from DeepWiki", status) + self.assertEqual("fresh wiki", cache_path.read_text(encoding="utf-8")) + read_wiki.assert_called_once() + + def test_cached_context_selects_scored_non_overlapping_snippets(self): + lines = ["overview"] * 80 + lines[5] = "Gremlin traversal examples explain graph query execution." + lines[50] = "Gremlin traversal cache context covers answer routing." + + matches = mcp.search_cached_context("\n".join(lines), "gremlin traversal", 2) + + self.assertEqual(2, len(matches)) + self.assertGreater(matches[0][0], 0) + self.assertGreater(matches[1][0], 0) + self.assertFalse( + matches[0][1] <= matches[1][2] and matches[0][2] >= matches[1][1] + ) + + +if __name__ == "__main__": + unittest.main()