Skip to content

Commit 2ed0fc2

Browse files
committed
Fix #34197: refactor logs for failed apps
1 parent 099f6bd commit 2ed0fc2

2 files changed

Lines changed: 36 additions & 8 deletions

File tree

src/main/java/eu/openanalytics/containerproxy/backend/docker/DockerSwarmBackend.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
import eu.openanalytics.containerproxy.ContainerFailedToStartException;
2424
import eu.openanalytics.containerproxy.ContainerProxyException;
25+
import eu.openanalytics.containerproxy.event.NewProxyEvent;
2526
import eu.openanalytics.containerproxy.model.runtime.Container;
2627
import eu.openanalytics.containerproxy.model.runtime.ExistingContainerInfo;
2728
import eu.openanalytics.containerproxy.model.runtime.PortMappings;
@@ -213,6 +214,7 @@ public Proxy startContainer(Authentication user, Container initialContainer, Con
213214
// tell the status service we are starting the container
214215
proxyStartupLogBuilder.startingContainer(initialContainer.getIndex());
215216
rContainerBuilder.addRuntimeValue(new RuntimeValue(BackendContainerNameKey.inst, new BackendContainerName(serviceName)), false);
217+
applicationEventPublisher.publishEvent(new NewProxyEvent(proxy.toBuilder().updateContainer(rContainerBuilder.build()).build(), user));
216218

217219
// Give the service some time to start up and launch a container.
218220
boolean containerFound = Retrying.retry((currentAttempt, maxAttempts) -> {

src/main/java/eu/openanalytics/containerproxy/stat/impl/Micrometer.java

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public class Micrometer implements IStatCollector {
8888

8989
private Counter userLogouts;
9090

91-
private Cache<String, String> recentlyStoppedProxies;
91+
private Cache<String, String> recentProxies;
9292

9393
private static final Map<ProxyStatus, Integer> PROXY_STATUS_TO_INTEGER = Map.of(
9494
ProxyStatus.New, 1,
@@ -119,7 +119,7 @@ private static <T> ToDoubleFunction<T> wrapHandleNull(ToIntegerFunction<T> produ
119119

120120
@PostConstruct
121121
public void init() {
122-
recentlyStoppedProxies = Caffeine.newBuilder()
122+
recentProxies = Caffeine.newBuilder()
123123
.scheduler(Scheduler.systemScheduler())
124124
.expireAfterWrite(2, TimeUnit.MINUTES)
125125
.build();
@@ -183,6 +183,7 @@ public void onNewProxyEvent(NewProxyEvent event) {
183183
"proxy.namespace", event.getBackendContainerName().getNamespace()),
184184
PROXY_STATUS_TO_INTEGER.get(ProxyStatus.New)
185185
);
186+
recentProxies.put(event.getProxyId(), event.getProxyId());
186187
}
187188
logger.debug("NewProxyEvent [user: {}]", event.getUserId());
188189
}
@@ -203,6 +204,7 @@ public void onProxyStartEvent(ProxyStartEvent event) {
203204
"proxy.namespace", event.getBackendContainerName().getNamespace()),
204205
PROXY_STATUS_TO_INTEGER.get(ProxyStatus.Up)
205206
);
207+
recentProxies.put(event.getProxyId(), event.getProxyId());
206208
}
207209
if (!event.isLocalEvent()) {
208210
return;
@@ -241,7 +243,7 @@ public void onProxyStartEvent(ProxyStartEvent event) {
241243

242244
@EventListener
243245
public void onProxyStopEvent(ProxyStopEvent event) {
244-
recentlyStoppedProxies.put(event.getProxyId(), event.getProxyId());
246+
recentProxies.put(event.getProxyId(), event.getProxyId());
245247
// must run on each instance (gauge is registered on every instance)
246248
removeExistingAppInfo(event.getProxyId());
247249
Integer value = PROXY_STATUS_TO_INTEGER.get(ProxyStatus.Stopped);
@@ -274,7 +276,7 @@ public void onProxyStopEvent(ProxyStopEvent event) {
274276

275277
@EventListener
276278
public void onProxyStartFailedEvent(ProxyStartFailedEvent event) {
277-
recentlyStoppedProxies.put(event.getProxyId(), event.getProxyId());
279+
recentProxies.put(event.getProxyId(), event.getProxyId());
278280
// must run on each instance (gauge is registered on every instance)
279281
String resourceId;
280282
String namespace;
@@ -385,10 +387,17 @@ public Integer getProxyCount() {
385387
private void updateAppInfo() {
386388
Map<String, Gauge> existingGauges = getAppInfoGauges();
387389
for (Proxy proxy : proxyService.getAllProxies()) {
388-
if (existingGauges.remove(proxy.getId()) != null) {
389-
// gauge already exists, no need to re-create
390+
Gauge existingGauge = existingGauges.remove(proxy.getId());
391+
if (existingGauge != null && (existingGauge.value() == PROXY_STATUS_TO_INTEGER.get(proxy.getStatus())
392+
|| existingGauge.value() == PROXY_STATUS_CRASHED_TO_INTEGER
393+
|| existingGauge.value() == PROXY_STATUS_FAILED_TO_START_TO_INTEGER)) {
394+
// gauge already exists and value is correct
390395
continue;
391396
}
397+
if (existingGauge != null) {
398+
registry.remove(existingGauge);
399+
}
400+
392401
BackendContainerName backendContainerName = getBackendContainerName(proxy);
393402
if (backendContainerName == null) {
394403
// container not fully ready, will be registered later
@@ -403,14 +412,31 @@ private void updateAppInfo() {
403412
"proxy.created.timestamp", Long.toString(proxy.getCreatedTimestamp()),
404413
"resource.id", backendContainerName.getName(),
405414
"proxy.namespace", backendContainerName.getNamespace()),
406-
1
415+
PROXY_STATUS_TO_INTEGER.get(proxy.getStatus())
407416
);
408417
}
409418
for (Gauge gauge : existingGauges.values()) {
410419
String proxyId = gauge.getId().getTag("proxy.id");
411-
if (proxyId != null && recentlyStoppedProxies.getIfPresent(proxyId) != null) {
420+
if (proxyId != null && recentProxies.getIfPresent(proxyId) != null) {
412421
// this proxy was recently stopped, we should not yet remove the gauge
413422
// so that the metrics systems knows the app was stopped
423+
if (gauge.value() != PROXY_STATUS_TO_INTEGER.get(ProxyStatus.Stopped)
424+
&& gauge.value() != PROXY_STATUS_CRASHED_TO_INTEGER
425+
&& gauge.value() != PROXY_STATUS_FAILED_TO_START_TO_INTEGER) {
426+
// gauge not yet updated -> set is as stopped
427+
registry.remove(gauge);
428+
registry.gauge("appInfo",
429+
Tags.of(
430+
"spec.id", gauge.getId().getTag("spec.id"),
431+
"user.id", gauge.getId().getTag("user.id"),
432+
"proxy.instance", gauge.getId().getTag("proxy.instance"),
433+
"proxy.id", gauge.getId().getTag("proxy.id"),
434+
"proxy.created.timestamp", gauge.getId().getTag("proxy.created.timestamp"),
435+
"resource.id", gauge.getId().getTag("resource.id"),
436+
"proxy.namespace", gauge.getId().getTag("proxy.namespace")),
437+
PROXY_STATUS_TO_INTEGER.get(ProxyStatus.Stopped)
438+
);
439+
}
414440
continue;
415441
}
416442
// the proxy of this gauge no longer exists -> remove the gauge

0 commit comments

Comments
 (0)