From 73a492f6f22f46d6037975f5ab41eb25b09b9bc6 Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Fri, 5 Jun 2026 14:08:16 -0700 Subject: [PATCH] Add source_url to MCP get_document_by_url output The MCP get_document_by_url tool now returns a source_url field containing the GitHub blob URL for the source file of each documentation page. The URL is computed at indexing time using the same git context that powers the existing "edit this page" link in the frontend, and stored as a keyword field in the Elasticsearch index. API reference docs (sourced from the temporary OpenAPI exporter) will have a null source_url until the planned Elastic.ApiExplorer pipeline is in place. Tests cover the mapping path with a stub gateway and validate the URL format against a live index. --- .../ElasticsearchMarkdownExporter.Export.cs | 14 +++++ .../Gateways/DocumentGateway.cs | 2 + .../Gateways/IDocumentGateway.cs | 1 + .../Responses/McpResponses.cs | 1 + .../Tools/DocumentTools.cs | 1 + .../DocumentationDocument.cs | 5 ++ .../DocumentToolsIntegrationTests.cs | 27 ++++++++ tests/Mcp.Remote.Tests/DocumentToolsTests.cs | 63 +++++++++++++++++++ 8 files changed, 114 insertions(+) create mode 100644 tests/Mcp.Remote.Tests/DocumentToolsTests.cs diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs index b44d144135..4175e07123 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs @@ -7,6 +7,7 @@ using Elastic.Documentation; using Elastic.Documentation.AppliesTo; using Elastic.Documentation.Configuration.Inference; +using Elastic.Documentation.Extensions; using Elastic.Documentation.Navigation; using Elastic.Documentation.Search; using Elastic.Ingest.Elasticsearch.Indices; @@ -157,6 +158,19 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, }).ToArray() : null; + var gitHubRepo = fileContext.BuildContext.Git.GitHubRepository; + var branch = fileContext.BuildContext.Git.Branch; + if (gitHubRepo is not null + && fileContext.BuildContext.Git != GitCheckoutInformation.Unavailable + && fileContext.BuildContext.DocumentationCheckoutDirectory is { } checkoutDirectory) + { + var relativeSourcePath = Path.GetRelativePath( + checkoutDirectory.FullName, + fileContext.BuildContext.DocumentationSourceDirectory.FullName); + var path = UrlPath.Join(relativeSourcePath, file.RelativePath); + doc.SourceUrl = $"https://github.com/{gitHubRepo}/blob/{branch}/{path}"; + } + CommonEnrichments(doc, currentNavigation); AssignContentHash(doc); AssignDocumentMetadata(doc); diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs index 6393674328..3dc621c9b9 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs @@ -44,6 +44,7 @@ public class DocumentGateway( e => e.AiQuestions, e => e.AiUseCases, e => e.LastUpdated, + e => e.SourceUrl, e => e.Product, e => e.RelatedProducts ))), @@ -76,6 +77,7 @@ public class DocumentGateway( AiQuestions = doc.AiQuestions, AiUseCases = doc.AiUseCases, LastUpdated = doc.LastUpdated, + SourceUrl = doc.SourceUrl, Product = doc.Product?.Id != null ? new DocumentProduct { Id = doc.Product.Id, diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs index 687b5c999d..881327def1 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs @@ -36,6 +36,7 @@ public record DocumentResult public string[]? AiQuestions { get; init; } public string[]? AiUseCases { get; init; } public DateTimeOffset? LastUpdated { get; init; } + public string? SourceUrl { get; init; } public DocumentParent[] Parents { get; init; } = []; public string[] Headings { get; init; } = []; public string[] Links { get; init; } = []; diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs b/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs index 7797e9e879..149c93138b 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs @@ -110,6 +110,7 @@ public sealed record DocumentResponse public string[]? AiQuestions { get; init; } public string[]? AiUseCases { get; init; } public DateTimeOffset? LastUpdated { get; init; } + public string? SourceUrl { get; init; } public required List Parents { get; init; } public required List Headings { get; init; } public ProductDto? Product { get; init; } diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs b/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs index 1081ae12ef..471d169d89 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs @@ -68,6 +68,7 @@ public async Task GetDocumentByUrl( AiQuestions = result.AiQuestions, AiUseCases = result.AiUseCases, LastUpdated = result.LastUpdated, + SourceUrl = result.SourceUrl, Parents = result.Parents.Select(p => new ParentDto { Title = p.Title, diff --git a/src/services/search/Elastic.Documentation.Search.Contract/DocumentationDocument.cs b/src/services/search/Elastic.Documentation.Search.Contract/DocumentationDocument.cs index 75244d6856..75c9db3482 100644 --- a/src/services/search/Elastic.Documentation.Search.Contract/DocumentationDocument.cs +++ b/src/services/search/Elastic.Documentation.Search.Contract/DocumentationDocument.cs @@ -99,6 +99,11 @@ public string ContentType [JsonPropertyName("navigation_section")] public string? NavigationSection { get; set; } + [Keyword] + [JsonPropertyName("source_url")] + [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] + public string? SourceUrl { get; set; } + /// The date of the batch update this document was part of last. /// This date could be higher than the date_last_updated. [BatchIndexDate] diff --git a/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs b/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs index 5921bb412b..72892cfcc2 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs +++ b/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs @@ -72,6 +72,33 @@ public async Task GetDocumentByUrl_NotFound_ReturnsError() Output.WriteLine($"Error message: {errorResponse.Error}"); } + [Fact] + public async Task GetDocumentByUrl_SourceUrlIsGitHubBlobUrlWhenPresent() + { + // Arrange + var (documentTools, clientAccessor) = CreateDocumentTools(); + Assert.SkipUnless(documentTools is not null, "Elasticsearch is not configured"); + LogDiagnostics(clientAccessor); + var canConnect = await clientAccessor!.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); + + // Act + var resultJson = await documentTools.GetDocumentByUrl( + "/docs/reference/elasticsearch", + cancellationToken: TestContext.Current.CancellationToken); + + if (resultJson.Contains("\"error\"")) + Assert.Skip("Test document not found in index"); + + var response = JsonSerializer.Deserialize(resultJson, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + Output.WriteLine($"source_url: {response!.SourceUrl ?? "(null — document predates indexing of this field)"}"); + + // source_url is null for docs indexed before this field was added; when present it must be a GitHub blob URL + if (response.SourceUrl is not null) + response.SourceUrl.Should().StartWith("https://github.com/").And.Contain("/blob/"); + } + [Fact] public async Task AnalyzeDocumentStructure_ReturnsStructure() { diff --git a/tests/Mcp.Remote.Tests/DocumentToolsTests.cs b/tests/Mcp.Remote.Tests/DocumentToolsTests.cs new file mode 100644 index 0000000000..fe8322242a --- /dev/null +++ b/tests/Mcp.Remote.Tests/DocumentToolsTests.cs @@ -0,0 +1,63 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Text.Json; +using AwesomeAssertions; +using Elastic.Documentation.Mcp.Remote.Gateways; +using Elastic.Documentation.Mcp.Remote.Responses; +using Elastic.Documentation.Mcp.Remote.Tools; +using Microsoft.Extensions.Logging.Abstractions; + +namespace Mcp.Remote.Tests; + +public class DocumentToolsTests +{ + [Fact] + public async Task GetDocumentByUrl_MapsSourceUrlIntoResponse() + { + const string expectedSourceUrl = "https://github.com/elastic/docs-content/blob/main/docs/some-page.md"; + var gateway = new StubDocumentGateway(new DocumentResult + { + Url = "/docs/some-page", + Title = "Some Page", + Type = "doc", + SourceUrl = expectedSourceUrl + }); + var tools = new DocumentTools(gateway, NullLogger.Instance); + + var json = await tools.GetDocumentByUrl("/docs/some-page"); + + var response = JsonSerializer.Deserialize(json, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + response!.SourceUrl.Should().Be(expectedSourceUrl); + } + + [Fact] + public async Task GetDocumentByUrl_OmitsSourceUrlWhenNull() + { + var gateway = new StubDocumentGateway(new DocumentResult + { + Url = "/docs/some-page", + Title = "Some Page", + Type = "doc", + SourceUrl = null + }); + var tools = new DocumentTools(gateway, NullLogger.Instance); + + var json = await tools.GetDocumentByUrl("/docs/some-page"); + + var response = JsonSerializer.Deserialize(json, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + response!.SourceUrl.Should().BeNull(); + } + + private sealed class StubDocumentGateway(DocumentResult? result) : IDocumentGateway + { + public Task GetByUrlAsync(string url, CancellationToken ct = default) => + Task.FromResult(result); + + public Task GetStructureAsync(string url, CancellationToken ct = default) => + Task.FromResult(null); + } +}