diff --git a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs index 03b09f5517..84775c0bf8 100644 --- a/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs +++ b/src/Elastic.Markdown/Exporters/Elasticsearch/ElasticsearchMarkdownExporter.Export.cs @@ -7,6 +7,7 @@ using Elastic.Documentation; using Elastic.Documentation.AppliesTo; using Elastic.Documentation.Configuration.Inference; +using Elastic.Documentation.Extensions; using Elastic.Documentation.Navigation; using Elastic.Documentation.Search; using Elastic.Ingest.Elasticsearch.Indices; @@ -157,6 +158,19 @@ public async ValueTask ExportAsync(MarkdownExportFileContext fileContext, }).ToArray() : null; + var gitHubRepo = fileContext.BuildContext.Git.GitHubRepository; + var branch = fileContext.BuildContext.Git.Branch; + if (gitHubRepo is not null + && fileContext.BuildContext.Git != GitCheckoutInformation.Unavailable + && fileContext.BuildContext.DocumentationCheckoutDirectory is { } checkoutDirectory) + { + var relativeSourcePath = Path.GetRelativePath( + checkoutDirectory.FullName, + fileContext.BuildContext.DocumentationSourceDirectory.FullName); + var path = UrlPath.Join(relativeSourcePath, file.RelativePath); + doc.SourceUrl = $"https://github.com/{gitHubRepo}/blob/{branch}/{path}"; + } + CommonEnrichments(doc, currentNavigation); AssignContentHash(doc); AssignDocumentMetadata(doc); diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs index 3e7629f2f8..b6bba4f6d4 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/DocumentGateway.cs @@ -47,6 +47,7 @@ public class DocumentGateway( e => e.AiQuestions, e => e.AiUseCases, e => e.LastUpdated, + e => e.SourceUrl, e => e.Product, e => e.RelatedProducts ))), @@ -79,6 +80,7 @@ public class DocumentGateway( AiQuestions = doc.AiQuestions, AiUseCases = doc.AiUseCases, LastUpdated = doc.LastUpdated, + SourceUrl = doc.SourceUrl, Product = doc.Product?.Id != null ? new DocumentProduct { Id = doc.Product.Id, diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs index 687b5c999d..881327def1 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Gateways/IDocumentGateway.cs @@ -36,6 +36,7 @@ public record DocumentResult public string[]? AiQuestions { get; init; } public string[]? AiUseCases { get; init; } public DateTimeOffset? LastUpdated { get; init; } + public string? SourceUrl { get; init; } public DocumentParent[] Parents { get; init; } = []; public string[] Headings { get; init; } = []; public string[] Links { get; init; } = []; diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs b/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs index 15f546d238..8838ab28e4 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Responses/McpResponses.cs @@ -110,6 +110,7 @@ public sealed record DocumentResponse public string[]? AiQuestions { get; init; } public string[]? AiUseCases { get; init; } public DateTimeOffset? LastUpdated { get; init; } + public string? SourceUrl { get; init; } public required List Parents { get; init; } public required List Headings { get; init; } public ProductDto? Product { get; init; } diff --git a/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs b/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs index 1081ae12ef..471d169d89 100644 --- a/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs +++ b/src/api/Elastic.Documentation.Mcp.Remote/Tools/DocumentTools.cs @@ -68,6 +68,7 @@ public async Task GetDocumentByUrl( AiQuestions = result.AiQuestions, AiUseCases = result.AiUseCases, LastUpdated = result.LastUpdated, + SourceUrl = result.SourceUrl, Parents = result.Parents.Select(p => new ParentDto { Title = p.Title, diff --git a/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs b/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs index 5921bb412b..72892cfcc2 100644 --- a/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs +++ b/tests-integration/Mcp.Remote.IntegrationTests/DocumentToolsIntegrationTests.cs @@ -72,6 +72,33 @@ public async Task GetDocumentByUrl_NotFound_ReturnsError() Output.WriteLine($"Error message: {errorResponse.Error}"); } + [Fact] + public async Task GetDocumentByUrl_SourceUrlIsGitHubBlobUrlWhenPresent() + { + // Arrange + var (documentTools, clientAccessor) = CreateDocumentTools(); + Assert.SkipUnless(documentTools is not null, "Elasticsearch is not configured"); + LogDiagnostics(clientAccessor); + var canConnect = await clientAccessor!.CanConnect(TestContext.Current.CancellationToken); + Assert.SkipUnless(canConnect, "Elasticsearch is not connected"); + + // Act + var resultJson = await documentTools.GetDocumentByUrl( + "/docs/reference/elasticsearch", + cancellationToken: TestContext.Current.CancellationToken); + + if (resultJson.Contains("\"error\"")) + Assert.Skip("Test document not found in index"); + + var response = JsonSerializer.Deserialize(resultJson, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + Output.WriteLine($"source_url: {response!.SourceUrl ?? "(null — document predates indexing of this field)"}"); + + // source_url is null for docs indexed before this field was added; when present it must be a GitHub blob URL + if (response.SourceUrl is not null) + response.SourceUrl.Should().StartWith("https://github.com/").And.Contain("/blob/"); + } + [Fact] public async Task AnalyzeDocumentStructure_ReturnsStructure() { diff --git a/tests/Mcp.Remote.Tests/DocumentToolsTests.cs b/tests/Mcp.Remote.Tests/DocumentToolsTests.cs new file mode 100644 index 0000000000..fe8322242a --- /dev/null +++ b/tests/Mcp.Remote.Tests/DocumentToolsTests.cs @@ -0,0 +1,63 @@ +// Licensed to Elasticsearch B.V under one or more agreements. +// Elasticsearch B.V licenses this file to you under the Apache 2.0 License. +// See the LICENSE file in the project root for more information + +using System.Text.Json; +using AwesomeAssertions; +using Elastic.Documentation.Mcp.Remote.Gateways; +using Elastic.Documentation.Mcp.Remote.Responses; +using Elastic.Documentation.Mcp.Remote.Tools; +using Microsoft.Extensions.Logging.Abstractions; + +namespace Mcp.Remote.Tests; + +public class DocumentToolsTests +{ + [Fact] + public async Task GetDocumentByUrl_MapsSourceUrlIntoResponse() + { + const string expectedSourceUrl = "https://github.com/elastic/docs-content/blob/main/docs/some-page.md"; + var gateway = new StubDocumentGateway(new DocumentResult + { + Url = "/docs/some-page", + Title = "Some Page", + Type = "doc", + SourceUrl = expectedSourceUrl + }); + var tools = new DocumentTools(gateway, NullLogger.Instance); + + var json = await tools.GetDocumentByUrl("/docs/some-page"); + + var response = JsonSerializer.Deserialize(json, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + response!.SourceUrl.Should().Be(expectedSourceUrl); + } + + [Fact] + public async Task GetDocumentByUrl_OmitsSourceUrlWhenNull() + { + var gateway = new StubDocumentGateway(new DocumentResult + { + Url = "/docs/some-page", + Title = "Some Page", + Type = "doc", + SourceUrl = null + }); + var tools = new DocumentTools(gateway, NullLogger.Instance); + + var json = await tools.GetDocumentByUrl("/docs/some-page"); + + var response = JsonSerializer.Deserialize(json, McpJsonContext.Default.DocumentResponse); + response.Should().NotBeNull(); + response!.SourceUrl.Should().BeNull(); + } + + private sealed class StubDocumentGateway(DocumentResult? result) : IDocumentGateway + { + public Task GetByUrlAsync(string url, CancellationToken ct = default) => + Task.FromResult(result); + + public Task GetStructureAsync(string url, CancellationToken ct = default) => + Task.FromResult(null); + } +}