Skip to content

Commit 48cdef8

Browse files
Expose more information on server process pages (#6278)
This change modifies the Monitor server pages to expose as much information as possible so that we can verify the accuracy of the information and determine which information we want to display vs. not display. The following modifications are included: 1. Metric.java was modified to include the label, css class, and description to use in the server page table headers. 2. A new class, ColumnJSGen, which exports the information from Metric.java to columns.js at build time and is packaged in the monitor jar file. The column.js includes the information in a javascript Map object which is referenced when processing the data from the server. 3. Changes in AbstractServer to exclude specific metrics from being included in the getMetrics RPC response. 4. A new class, ServersView, which replaces the class ScanServerView but serves the same purpose. This class returns the columns, data, and status information for use on the server process pages. 5. A new `servers/view` endpoint which takes a path parameter for the server type. Example: servers/view;serverType=SCAN_SERVER. This endpoint returns a ServersView object. This endpoint replaces the `sservers/view` endpoint that returned the ScanServerView object. 6. Updates to the SystemInformation.finish method that creates memoized Suppliers for the ServersView object for each server type. When the endpoint is hit the Supplier will calculate the response object the first time, then cache it for subsequent requests. 7. Centralized the javascript code for creating the DataTables and HTML table headers into a new file called server_process_common.js. The methods in this file use the information from the ServersView object and columns.js to create the necessary objects to render the tables in the server process pages. 8. Updated the server page js and ftl files accordingly to use the server_process_common.js file and other new items. Co-authored-by: Dom G. <domgarguilo@apache.org>
1 parent 304f52c commit 48cdef8

28 files changed

Lines changed: 2914 additions & 1050 deletions

File tree

LICENSE

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,4 +309,12 @@ Files (in server/monitor/src/main/resources/):
309309
Copyright (c) 2008-2024 SpryMedia Ltd
310310
Licensed under the MIT license (see above)
311311

312+
## DataTables ColReorder 1.7.0 (https://datatables.net)
313+
314+
Files (in server/monitor/src/main/resources/):
315+
* org/apache/accumulo/monitor/resources/external/datatables/**/*
316+
317+
Copyright (c) 2010-2015 SpryMedia Limited
318+
Licensed under the MIT license (see above)
319+
312320
**********

core/src/main/java/org/apache/accumulo/core/metrics/Metric.java

Lines changed: 253 additions & 114 deletions
Large diffs are not rendered by default.

server/base/src/main/java/org/apache/accumulo/server/AbstractServer.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import java.net.UnknownHostException;
2424
import java.util.OptionalInt;
25+
import java.util.Set;
2526
import java.util.concurrent.ScheduledFuture;
2627
import java.util.concurrent.atomic.AtomicBoolean;
2728
import java.util.concurrent.atomic.AtomicReference;
@@ -39,6 +40,7 @@
3940
import org.apache.accumulo.core.conf.SiteConfiguration;
4041
import org.apache.accumulo.core.data.ResourceGroupId;
4142
import org.apache.accumulo.core.lock.ServiceLock;
43+
import org.apache.accumulo.core.metrics.Metric;
4244
import org.apache.accumulo.core.metrics.MetricsProducer;
4345
import org.apache.accumulo.core.process.thrift.MetricResponse;
4446
import org.apache.accumulo.core.process.thrift.MetricSource;
@@ -95,6 +97,7 @@ public static void startServer(AbstractServer server, Logger LOG) throws Excepti
9597
private final AtomicBoolean shutdownRequested = new AtomicBoolean(false);
9698
private final AtomicBoolean shutdownComplete = new AtomicBoolean(false);
9799
private final AtomicBoolean closed = new AtomicBoolean(false);
100+
private final Set<String> monitorMetricExclusions;
98101

99102
protected AbstractServer(ServerId.Type serverType, ServerOpts opts,
100103
BiFunction<SiteConfiguration,ResourceGroupId,ServerContext> serverContextFactory,
@@ -177,6 +180,7 @@ protected AbstractServer(ServerId.Type serverType, ServerOpts opts,
177180
default:
178181
throw new IllegalArgumentException("Unhandled server type: " + serverType);
179182
}
183+
monitorMetricExclusions = Metric.getMonitorExclusions(serverType);
180184
}
181185

182186
/**
@@ -403,10 +407,12 @@ public MetricResponse getMetrics(TInfo tinfo, TCredentials credentials) throws T
403407
if (context.getMetricsInfo().isMetricsEnabled()) {
404408
Metrics.globalRegistry.getMeters().forEach(m -> {
405409
if (m.getId().getName().startsWith("accumulo.")) {
406-
m.match(response::writeMeter, response::writeMeter, response::writeTimer,
407-
response::writeDistributionSummary, response::writeLongTaskTimer,
408-
response::writeMeter, response::writeMeter, response::writeFunctionTimer,
409-
response::writeMeter);
410+
if (!this.monitorMetricExclusions.contains(m.getId().getName())) {
411+
m.match(response::writeMeter, response::writeMeter, response::writeTimer,
412+
response::writeDistributionSummary, response::writeLongTaskTimer,
413+
response::writeMeter, response::writeMeter, response::writeFunctionTimer,
414+
response::writeMeter);
415+
}
410416
}
411417
});
412418
}

server/monitor/pom.xml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@
5454
<groupId>com.github.ben-manes.caffeine</groupId>
5555
<artifactId>caffeine</artifactId>
5656
</dependency>
57+
<dependency>
58+
<groupId>com.google.code.gson</groupId>
59+
<artifactId>gson</artifactId>
60+
</dependency>
5761
<dependency>
5862
<groupId>com.google.guava</groupId>
5963
<artifactId>guava</artifactId>
@@ -242,6 +246,26 @@
242246
</plugins>
243247
</pluginManagement>
244248
<plugins>
249+
<plugin>
250+
<groupId>org.codehaus.mojo</groupId>
251+
<artifactId>exec-maven-plugin</artifactId>
252+
<executions>
253+
<execution>
254+
<id>metrics-javascript</id>
255+
<goals>
256+
<goal>java</goal>
257+
</goals>
258+
<phase>prepare-package</phase>
259+
<configuration>
260+
<mainClass>org.apache.accumulo.monitor.next.views.ColumnJsGen</mainClass>
261+
<classpathScope>compile</classpathScope>
262+
<arguments>
263+
<argument>${project.build.directory}/classes/org/apache/accumulo/monitor/resources/js/columns.js</argument>
264+
</arguments>
265+
</configuration>
266+
</execution>
267+
</executions>
268+
</plugin>
245269
<plugin>
246270
<groupId>org.apache.maven.plugins</groupId>
247271
<artifactId>maven-resources-plugin</artifactId>

server/monitor/src/main/java/org/apache/accumulo/monitor/next/Endpoints.java

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import jakarta.inject.Inject;
3434
import jakarta.servlet.http.HttpServletRequest;
3535
import jakarta.ws.rs.GET;
36+
import jakarta.ws.rs.MatrixParam;
3637
import jakarta.ws.rs.NotFoundException;
3738
import jakarta.ws.rs.Path;
3839
import jakarta.ws.rs.PathParam;
@@ -54,20 +55,25 @@
5455
import org.apache.accumulo.monitor.next.deployment.DeploymentOverview;
5556
import org.apache.accumulo.monitor.next.ec.CompactorsSummary;
5657
import org.apache.accumulo.monitor.next.ec.CoordinatorSummary;
57-
import org.apache.accumulo.monitor.next.sservers.ScanServerView;
58+
import org.apache.accumulo.monitor.next.views.ServersView;
5859

5960
import io.micrometer.core.instrument.Meter.Id;
6061
import io.micrometer.core.instrument.cumulative.CumulativeDistributionSummary;
6162

6263
@Path("/")
6364
public class Endpoints {
6465
/**
65-
* A {@code String} constant representing supplied resource group in path parameter.
66+
* A {@code String} constant representing the supplied resource group in path parameter.
6667
*/
6768
private static final String GROUP_PARAM_KEY = "group";
6869

6970
/**
70-
* A {@code String} constant representing supplied tableId in path parameter.
71+
* A {@code String} constant representing the supplied server type in path parameter.
72+
*/
73+
private static final String SERVER_TYPE_KEY = "serverType";
74+
75+
/**
76+
* A {@code String} constant representing the supplied tableId in path parameter.
7177
*/
7278
private static final String TABLEID_PARAM_KEY = "tableId";
7379

@@ -271,11 +277,17 @@ public Map<Id,CumulativeDistributionSummary> getScanServerAllMetricSummary() {
271277
}
272278

273279
@GET
274-
@Path("sservers/view")
280+
@Path("servers/view")
275281
@Produces(MediaType.APPLICATION_JSON)
276-
@Description("Returns a UI-ready view model for the Scan Server status page")
277-
public ScanServerView getScanServerPageView() {
278-
return monitor.getInformationFetcher().getSummaryForEndpoint().getScanServerView();
282+
@Description("Returns a UI-ready view model for server processes. Add ';serverType=<ServerId.Type>' to URL")
283+
public ServersView getServerProcessView(@MatrixParam(SERVER_TYPE_KEY) ServerId.Type serverType) {
284+
ServersView view =
285+
monitor.getInformationFetcher().getSummaryForEndpoint().getServerProcessView(serverType);
286+
if (view == null) {
287+
throw new NotFoundException(
288+
"ServersView object for server type " + serverType.name() + " not found");
289+
}
290+
return view;
279291
}
280292

281293
@GET

server/monitor/src/main/java/org/apache/accumulo/monitor/next/InformationFetcher.java

Lines changed: 36 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ public void newConnectionEvent() {
211211
}
212212

213213
// Protect against NPE and wait for initial data gathering
214-
public SystemInformation getSummary() throws InterruptedException {
214+
private SystemInformation getSummary() throws InterruptedException {
215215
while (summaryRef.get() == null) {
216216
Thread.sleep(100);
217217
}
@@ -254,7 +254,7 @@ public void onRemoval(@Nullable ServerId server, @Nullable MetricResponse respon
254254
@Override
255255
public void run() {
256256

257-
long refreshTime = 0;
257+
long lastRunTime = 0;
258258

259259
while (true) {
260260

@@ -263,7 +263,7 @@ public void run() {
263263
// If a connection has not been made in a while, stale data may be displayed.
264264
// Only refresh every 5s (old monitor logic).
265265
while (!newConnectionEvent.get() && connectionCount.get() == 0
266-
&& NanoTime.millisElapsed(refreshTime, NanoTime.now()) > 5000) {
266+
&& NanoTime.millisElapsed(lastRunTime, NanoTime.now()) > 5000) {
267267
try {
268268
Thread.sleep(100);
269269
} catch (InterruptedException e) {
@@ -275,7 +275,7 @@ public void run() {
275275
// reset the connection event flag
276276
newConnectionEvent.compareAndExchange(true, false);
277277

278-
LOG.info("Fetching metrics from servers");
278+
LOG.info("Fetching information from servers");
279279

280280
final List<Future<?>> futures = new ArrayList<>();
281281
final SystemInformation summary = new SystemInformation(allMetrics, this.ctx);
@@ -315,13 +315,18 @@ public void run() {
315315
}
316316
}));
317317

318-
long monitorFetchTimeout =
318+
final long monitorFetchTimeout =
319319
ctx.getConfiguration().getTimeInMillis(Property.MONITOR_FETCH_TIMEOUT);
320-
long allFuturesAdded = NanoTime.now();
320+
final long allFuturesAdded = NanoTime.now();
321321
boolean tookToLong = false;
322322
while (!futures.isEmpty()) {
323323

324324
if (NanoTime.millisElapsed(allFuturesAdded, NanoTime.now()) > monitorFetchTimeout) {
325+
LOG.warn(
326+
"Fetching information for Monitor has taken longer {}. Cancelling all"
327+
+ " remaining tasks and monitor will display old information. Resolve issue"
328+
+ " causing this or increase property {}.",
329+
monitorFetchTimeout, Property.MONITOR_FETCH_TIMEOUT.getKey());
325330
tookToLong = true;
326331
}
327332

@@ -344,26 +349,31 @@ public void run() {
344349
}
345350
}
346351

347-
summary.finish();
348-
349-
refreshTime = NanoTime.now();
350-
LOG.info("Finished fetching metrics from servers");
351-
LOG.info(
352-
"All: {}, Manager: {}, Garbage Collector: {}, Compactors: {}, Scan Servers: {}, Tablet Servers: {}",
353-
allMetrics.estimatedSize(), summary.getManager() != null,
354-
summary.getGarbageCollector() != null,
355-
summary.getCompactorAllMetricSummary().isEmpty() ? 0
356-
: summary.getCompactorAllMetricSummary().entrySet().iterator().next().getValue()
357-
.count(),
358-
summary.getSServerAllMetricSummary().isEmpty() ? 0
359-
: summary.getSServerAllMetricSummary().entrySet().iterator().next().getValue()
360-
.count(),
361-
summary.getTServerAllMetricSummary().isEmpty() ? 0 : summary.getTServerAllMetricSummary()
362-
.entrySet().iterator().next().getValue().count());
363-
364-
SystemInformation oldSummary = summaryRef.getAndSet(summary);
365-
if (oldSummary != null) {
366-
oldSummary.clear();
352+
lastRunTime = NanoTime.now();
353+
354+
if (tookToLong) {
355+
summary.clear();
356+
} else {
357+
summary.finish();
358+
359+
LOG.info("Finished fetching metrics from servers");
360+
LOG.info(
361+
"All: {}, Manager: {}, Garbage Collector: {}, Compactors: {}, Scan Servers: {}, Tablet Servers: {}",
362+
allMetrics.estimatedSize(), summary.getManager() != null,
363+
summary.getGarbageCollector() != null,
364+
summary.getCompactorAllMetricSummary().isEmpty() ? 0
365+
: summary.getCompactorAllMetricSummary().entrySet().iterator().next().getValue()
366+
.count(),
367+
summary.getSServerAllMetricSummary().isEmpty() ? 0
368+
: summary.getSServerAllMetricSummary().entrySet().iterator().next().getValue()
369+
.count(),
370+
summary.getTServerAllMetricSummary().isEmpty() ? 0 : summary
371+
.getTServerAllMetricSummary().entrySet().iterator().next().getValue().count());
372+
373+
SystemInformation oldSummary = summaryRef.getAndSet(summary);
374+
if (oldSummary != null) {
375+
oldSummary.clear();
376+
}
367377
}
368378
}
369379

0 commit comments

Comments
 (0)