Skip to content

Commit 65e94f0

Browse files
authored
improve: service console output reading (#1605)
### Motivation Currently service process output streams are read once per second on a single shared thread. This causes issues with delayed logs, especially when the JVM crashes at startup as 99% of the times the JVM crash reason wasn't read from the process error stream. This leaves users with nearly undiagnosable startup issues. Example log output with an invalid JVM flag before the change: ``` 4.0.0-RC11-SNAPSHOT-f66323ca => create by Proxy 1 --start [26.08 09:26:50.417] INFO : Starting to create 1 services for Proxy [26.08 09:26:50.641] INFO : CloudService [uniqueId=04789662-d056-48ba-975a-f69993ed56dd task=Proxy name=Proxy-1] has been started [26.08 09:26:50.642] INFO : The services were created based on the task. They can be managed with the service command [26.08 09:26:50.703] INFO : CloudService [uniqueId=04789662-d056-48ba-975a-f69993ed56dd task=Proxy name=Proxy-1] has been stopped ``` ### Modification Now the log cache uses a scaling task executor paired with a custom non-blocking input stream reader to read the console output produced by services. Due to the better buffering and faster read times, more log output should be catched more quickly which also includes jvm crashes. The result is not perfect, but way better than it was before. Additionally, empty log lines are now ignored in the parent log cache class and there is no need anymore for log caches to implement the handling themselfes. Also a small issue with caching was fixed that caused the cache to still contain 1 item even if the log cache entry limit was set to 0. ### Result Log output of services is not read immediately once data is available from the input or output stream, making the output more responsive (as it is nearly immediately printed) and also catches everything that is being logged. Example log output with an invalid JVM flag & this change applied: ``` 4.0.0-RC11-SNAPSHOT-b99d7fbc => create by Proxy 1 --start [26.08 09:21:59.245] INFO : Starting to create 1 services for Proxy [26.08 09:21:59.289] INFO : CloudService [uniqueId=3a718134-d248-41d4-9f40-75751e1c1d64 task=Proxy name=Proxy-1] has been started [26.08 09:21:59.289] INFO : The services were created based on the task. They can be managed with the service command [26.08 09:21:59.295] WARN : [Proxy-1/WARN]: Unrecognized option: -XIReallyKnowWhatIAmDoingISwear [26.08 09:21:59.296] WARN : [Proxy-1/WARN]: Error: Could not create the Java Virtual Machine. [26.08 09:21:59.296] WARN : [Proxy-1/WARN]: Error: A fatal exception has occurred. Program will exit. [26.08 09:22:00.303] INFO : CloudService [uniqueId=3a718134-d248-41d4-9f40-75751e1c1d64 task=Proxy name=Proxy-1] has been stopped ```
1 parent a342ebc commit 65e94f0

14 files changed

Lines changed: 632 additions & 146 deletions

File tree

.idea/codeStyles/Project.xml

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

checkstyle.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
"https://checkstyle.org/dtds/configuration_1_3.dtd">
2121
<module name="Checker">
2222
<module name="BeforeExecutionExclusionFileFilter">
23-
<property name="fileNamePattern" value="(module\-info\.java$)|(.*[\\|\/]fabric[\\|\/].*$)"/>
23+
<property name="fileNamePattern" value="(module\-info\.java$)|(.*[\\|\/]fabric[\\|\/].*|(JVM|Dockerized)Service\.java$)"/>
2424
</module>
2525
<module name="SuppressionFilter">
2626
<property default="checkstyle-suppressions.xml" name="file"

modules/dockerized-services/impl/src/main/java/eu/cloudnetservice/modules/docker/impl/DockerizedService.java

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ public class DockerizedService extends JVMService {
8989

9090
protected final DockerClient dockerClient;
9191
protected final DockerConfiguration configuration;
92-
protected final DockerizedServiceLogCache logCache;
9392

9493
protected volatile String containerId;
9594

@@ -109,21 +108,20 @@ protected DockerizedService(
109108
@NonNull DockerClient dockerClient,
110109
@NonNull DockerConfiguration dockerConfiguration
111110
) {
111+
var logCache = new DockerizedServiceLogCache(nodeConfig, configuration.serviceId());
112112
super(
113113
i18n,
114114
tickLoop,
115115
nodeConfig,
116116
configuration,
117117
manager,
118118
eventManager,
119+
logCache,
119120
versionProvider,
120121
serviceConfigurationPreparer);
121122

122123
this.dockerClient = dockerClient;
123124
this.configuration = dockerConfiguration;
124-
125-
super.logCache = this.logCache = new DockerizedServiceLogCache(nodeConfig, this);
126-
this.initLogHandler();
127125
}
128126

129127
@Override
@@ -379,8 +377,8 @@ protected boolean needsImagePull(@NonNull DockerImage image) {
379377
public final class ServiceLogCacheAdapter extends ResultCallback.Adapter<Frame> {
380378

381379
@Override
382-
public void onNext(Frame object) {
383-
DockerizedService.this.logCache.handle(object);
380+
public void onNext(@NonNull Frame object) {
381+
((DockerizedServiceLogCache) DockerizedService.this.logCache).handle(object);
384382
}
385383
}
386384
}

modules/dockerized-services/impl/src/main/java/eu/cloudnetservice/modules/docker/impl/DockerizedServiceLogCache.java

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,16 @@
1717
package eu.cloudnetservice.modules.docker.impl;
1818

1919
import com.github.dockerjava.api.model.Frame;
20+
import eu.cloudnetservice.driver.service.ServiceId;
2021
import eu.cloudnetservice.node.config.Configuration;
2122
import eu.cloudnetservice.node.impl.service.defaults.log.AbstractServiceLogCache;
22-
import eu.cloudnetservice.node.service.CloudService;
23-
import eu.cloudnetservice.node.service.ServiceConsoleLogCache;
2423
import java.nio.charset.StandardCharsets;
2524
import lombok.NonNull;
2625

2726
public class DockerizedServiceLogCache extends AbstractServiceLogCache {
2827

29-
public DockerizedServiceLogCache(@NonNull Configuration configuration, @NonNull CloudService service) {
30-
super(configuration, service);
31-
}
32-
33-
@Override
34-
public @NonNull ServiceConsoleLogCache update() {
35-
return this;
28+
public DockerizedServiceLogCache(@NonNull Configuration configuration, @NonNull ServiceId associatedServiceId) {
29+
super(configuration, associatedServiceId);
3630
}
3731

3832
public void handle(@NonNull Frame frame) {
@@ -49,9 +43,7 @@ protected void handleItem(@NonNull String content, boolean comesFromErrorStream)
4943
if (content.contains("\n") || content.contains("\r")) {
5044
for (var input : content.split("\r")) {
5145
for (var text : input.split("\n")) {
52-
if (!text.trim().isEmpty()) {
53-
super.handleItem(text, comesFromErrorStream);
54-
}
46+
super.handleItem(text, comesFromErrorStream);
5547
}
5648
}
5749
}

node/api/src/main/java/eu/cloudnetservice/node/service/ServiceConsoleLogCache.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,18 +16,19 @@
1616

1717
package eu.cloudnetservice.node.service;
1818

19+
import eu.cloudnetservice.driver.service.ServiceId;
1920
import java.util.Collection;
2021
import java.util.Queue;
2122
import lombok.NonNull;
2223
import org.jetbrains.annotations.UnmodifiableView;
2324

2425
public interface ServiceConsoleLogCache {
2526

26-
@NonNull Queue<String> cachedLogMessages();
27-
28-
@NonNull ServiceConsoleLogCache update();
27+
@NonNull
28+
Queue<String> cachedLogMessages();
2929

30-
@NonNull CloudService service();
30+
@NonNull
31+
ServiceId associatedServiceId();
3132

3233
int logCacheSize();
3334

@@ -42,5 +43,6 @@ public interface ServiceConsoleLogCache {
4243
void removeHandler(@NonNull ServiceConsoleLineHandler handler);
4344

4445
@NonNull
45-
@UnmodifiableView Collection<ServiceConsoleLineHandler> handlers();
46+
@UnmodifiableView
47+
Collection<ServiceConsoleLineHandler> handlers();
4648
}

node/impl/src/main/java/eu/cloudnetservice/node/impl/service/defaults/AbstractService.java

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424
import eu.cloudnetservice.driver.channel.ChannelMessageTarget;
2525
import eu.cloudnetservice.driver.document.Document;
2626
import eu.cloudnetservice.driver.event.EventManager;
27+
import eu.cloudnetservice.driver.event.events.service.CloudServiceLogEntryEvent;
2728
import eu.cloudnetservice.driver.impl.network.NetworkConstants;
2829
import eu.cloudnetservice.driver.language.I18n;
2930
import eu.cloudnetservice.driver.network.HostAndPort;
3031
import eu.cloudnetservice.driver.network.NetworkChannel;
3132
import eu.cloudnetservice.driver.network.buffer.DataBuf;
3233
import eu.cloudnetservice.driver.network.ssl.SSLConfiguration;
33-
import eu.cloudnetservice.driver.registry.Service;
3434
import eu.cloudnetservice.driver.service.ProcessSnapshot;
3535
import eu.cloudnetservice.driver.service.ServiceConfiguration;
3636
import eu.cloudnetservice.driver.service.ServiceDeployment;
@@ -105,6 +105,7 @@ public abstract class AbstractService implements InternalCloudService {
105105
protected final DefaultTickLoop mainThread;
106106
protected final EventManager eventManager;
107107
protected final Configuration configuration;
108+
protected final ServiceConsoleLogCache logCache;
108109
protected final InternalCloudServiceManager cloudServiceManager;
109110
protected final ServiceConfiguration serviceConfiguration;
110111
protected final ServiceVersionProvider serviceVersionProvider;
@@ -121,25 +122,25 @@ public abstract class AbstractService implements InternalCloudService {
121122
protected final Collection<ServiceRemoteInclusion> installedInclusions = ConcurrentHashMap.newKeySet();
122123
protected final Collection<ServiceDeployment> installedDeployments = ConcurrentHashMap.newKeySet();
123124

124-
protected ServiceConsoleLogCache logCache;
125-
126125
protected volatile NetworkChannel networkChannel;
127126
protected volatile long connectionTimestamp = -1;
128127

129128
protected volatile ServiceInfoSnapshot lastServiceInfo;
130129
protected volatile ServiceInfoSnapshot currentServiceInfo;
131130

132131
protected AbstractService(
133-
@NonNull @Service I18n i18n,
132+
@NonNull I18n i18n,
134133
@NonNull DefaultTickLoop tickLoop,
135134
@NonNull Configuration nodeConfig,
136135
@NonNull ServiceConfiguration configuration,
137136
@NonNull InternalCloudServiceManager manager,
138137
@NonNull EventManager eventManager,
138+
@NonNull ServiceConsoleLogCache logCache,
139139
@NonNull ServiceVersionProvider versionProvider,
140140
@NonNull ServiceConfigurationPreparer serviceConfigurationPreparer
141141
) {
142142
this.i18n = i18n;
143+
this.logCache = logCache;
143144
this.mainThread = tickLoop;
144145
this.configuration = nodeConfig;
145146
this.eventManager = eventManager;
@@ -162,6 +163,7 @@ protected AbstractService(
162163
ServiceLifeCycle.PREPARED,
163164
configuration.propertyHolder().immutableCopy());
164165
this.pushServiceInfoSnapshotUpdate(ServiceLifeCycle.PREPARED, false);
166+
this.initStandardServiceLogHandler();
165167

166168
// register the service locally for now
167169
manager.registerUnacceptedService(this);
@@ -884,6 +886,32 @@ protected void downloadInclusionFile(@NonNull ServiceRemoteInclusion inclusion,
884886
this.serviceId().nodeUniqueId()};
885887
}
886888

889+
protected void initStandardServiceLogHandler() {
890+
this.logCache.addHandler((_, line, stderr) -> {
891+
for (var logTarget : this.logTargets) {
892+
if (logTarget._1().equals(ChannelMessageSender.self().toTarget())) {
893+
// the current target is the node this service is running on, print it directly here
894+
this.eventManager.callEvent(logTarget._2(), new CloudServiceLogEntryEvent(this.currentServiceInfo,
895+
line,
896+
stderr ? CloudServiceLogEntryEvent.StreamType.STDERR : CloudServiceLogEntryEvent.StreamType.STDOUT));
897+
} else {
898+
// the listener is listening remotely, send the line to the network component
899+
ChannelMessage.builder()
900+
.target(logTarget._1())
901+
.channel(NetworkConstants.INTERNAL_MSG_CHANNEL)
902+
.message("screen_new_line")
903+
.buffer(DataBuf.empty()
904+
.writeObject(this.currentServiceInfo)
905+
.writeString(logTarget._2())
906+
.writeString(line)
907+
.writeBoolean(stderr))
908+
.build()
909+
.send();
910+
}
911+
}
912+
});
913+
}
914+
887915
protected abstract void startProcess();
888916

889917
protected abstract void stopProcess();

node/impl/src/main/java/eu/cloudnetservice/node/impl/service/defaults/DefaultCloudServiceManager.java

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -170,20 +170,14 @@ public DefaultCloudServiceManager(
170170
})
171171
.currentGetter(group -> this.serviceProviderByName(group.name()).serviceInfo())
172172
.build());
173-
// schedule the updating of the local service log cache
173+
174+
// schedule the service watchdog to run once per second
174175
mainThread.scheduleTask(() -> {
175176
for (var service : this.localCloudServices()) {
176-
// we only need to look at running services
177-
if (service.lifeCycle() == ServiceLifeCycle.RUNNING) {
178-
// detect dead services and stop them
179-
if (service.alive()) {
180-
service.serviceConsoleLogCache().update();
181-
LOGGER.trace("Updated service log cache of {}", service.serviceId().name());
182-
} else {
183-
eventManager.callEvent(new CloudServicePreForceStopEvent(service));
184-
service.stop();
185-
LOGGER.trace("Stopped dead service {}", service.serviceId().name());
186-
}
177+
if (service.lifeCycle() == ServiceLifeCycle.RUNNING && !service.alive()) {
178+
eventManager.callEvent(new CloudServicePreForceStopEvent(service));
179+
service.stop();
180+
LOGGER.debug("Stopped dead service {}", service.serviceId().name());
187181
}
188182
}
189183
return null;

0 commit comments

Comments
 (0)