Skip to content

Commit c25ec87

Browse files
committed
Add metrics for scheduling and pulling
1 parent cdc4368 commit c25ec87

4 files changed

Lines changed: 87 additions & 0 deletions

File tree

src/main/java/eu/openanalytics/containerproxy/backend/kubernetes/KubernetesBackend.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@
4444
import io.fabric8.kubernetes.api.model.ContainerPortBuilder;
4545
import io.fabric8.kubernetes.api.model.EnvVar;
4646
import io.fabric8.kubernetes.api.model.EnvVarSourceBuilder;
47+
import io.fabric8.kubernetes.api.model.Event;
4748
import io.fabric8.kubernetes.api.model.GenericKubernetesResource;
4849
import io.fabric8.kubernetes.api.model.GenericKubernetesResourceList;
4950
import io.fabric8.kubernetes.api.model.HasMetadata;
5051
import io.fabric8.kubernetes.api.model.LocalObjectReference;
5152
import io.fabric8.kubernetes.api.model.ObjectMetaBuilder;
53+
import io.fabric8.kubernetes.api.model.ObjectReferenceBuilder;
5254
import io.fabric8.kubernetes.api.model.Pod;
5355
import io.fabric8.kubernetes.api.model.PodBuilder;
5456
import io.fabric8.kubernetes.api.model.ServiceBuilder;
@@ -87,6 +89,9 @@
8789
import java.nio.file.Files;
8890
import java.nio.file.Path;
8991
import java.nio.file.Paths;
92+
import java.time.LocalDateTime;
93+
import java.time.OffsetDateTime;
94+
import java.time.ZoneId;
9095
import java.util.ArrayList;
9196
import java.util.Arrays;
9297
import java.util.Collections;
@@ -312,6 +317,39 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
312317
proxyStatusService.containerStarted(proxy, container);
313318
Pod pod = kubeClient.resource(startedPod).fromServer().get();
314319

320+
// TODO check k8s compatibility
321+
List<Event> events = kubeClient.v1().events().withInvolvedObject(new ObjectReferenceBuilder()
322+
.withKind("Pod")
323+
.withName(pod.getMetadata().getName())
324+
.withNamespace(pod.getMetadata().getNamespace())
325+
.build()).list().getItems();
326+
327+
LocalDateTime pullingTime = null;
328+
LocalDateTime pulledTime = null;
329+
LocalDateTime scheduledTime = null;
330+
331+
for (Event event : events) {
332+
if (event.getCount() != null && event.getCount() > 1) {
333+
// ignore events which happened multiple time as we are unable to properly process them
334+
continue;
335+
}
336+
if (event.getReason().equalsIgnoreCase("Pulling")) {
337+
pullingTime = OffsetDateTime.parse(event.getLastTimestamp()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
338+
} else if (event.getReason().equalsIgnoreCase("Pulled")) {
339+
pulledTime = OffsetDateTime.parse(event.getLastTimestamp()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
340+
} else if (event.getReason().equalsIgnoreCase("Scheduled")) {
341+
scheduledTime = OffsetDateTime.parse(event.getEventTime().getTime()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
342+
}
343+
}
344+
345+
if (pullingTime != null && pulledTime != null) {
346+
proxyStatusService.imagePulled(proxy, container, pullingTime, pulledTime);
347+
}
348+
349+
if (scheduledTime != null) {
350+
proxyStatusService.containerScheduled(proxy, container, scheduledTime);
351+
}
352+
315353
Service service = null;
316354
if (isUseInternalNetwork()) {
317355
// If SP runs inside the cluster, it can access pods directly and doesn't need any port publishing service.

src/main/java/eu/openanalytics/containerproxy/model/runtime/ProxyStartupLog.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,14 @@ public Map<Integer, StartupStep> getStartApplication() {
4747
return startApplication;
4848
}
4949

50+
public Map<Integer, StartupStep> getPullImage() {
51+
return pullImage;
52+
}
53+
54+
public Map<Integer, StartupStep> getScheduleContainer() {
55+
return scheduleContainer;
56+
}
57+
5058
public static class StartupStep {
5159

5260
private LocalDateTime startTime = null;
@@ -69,6 +77,15 @@ public void stepSucceeded() {
6977
state = StartupStepState.SUCCESS;
7078
}
7179

80+
public void stepSucceeded(LocalDateTime startTime, LocalDateTime endTime) {
81+
if (state != StartupStepState.NOT_EXECUTED || startTime == null || endTime == null) {
82+
throw new IllegalStateException("Cannot start step if it's already started!");
83+
}
84+
this.startTime = startTime;
85+
this.endTime = endTime;
86+
state = StartupStepState.SUCCESS;
87+
}
88+
7289
public void stepFailed() {
7390
if (state != StartupStepState.STARTED) {
7491
throw new IllegalStateException("Cannot finish (with failure) step if it is not yet started or already completed");

src/main/java/eu/openanalytics/containerproxy/service/ProxyStatusService.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import eu.openanalytics.containerproxy.model.runtime.ProxyStartupLog;
66
import org.springframework.stereotype.Service;
77

8+
import java.time.LocalDateTime;
89
import java.util.Map;
910
import java.util.concurrent.ConcurrentHashMap;
1011

@@ -29,6 +30,22 @@ public void containerStarting(Proxy proxy, Container container) {
2930
startupLog.get(proxy.getId()).getStartContainer(container.getIndex()).stepStarted();
3031
}
3132

33+
/**
34+
* Step 2.1: schedule container
35+
*/
36+
public void containerScheduled(Proxy proxy, Container container, LocalDateTime scheduledTime) {
37+
startupLog.get(proxy.getId()).getScheduleContainer(container.getIndex()).stepSucceeded(
38+
startupLog.get(proxy.getId()).getStartContainer(container.getIndex()).getStartTime(),
39+
scheduledTime);
40+
}
41+
42+
/**
43+
* Step 2.2: pull image
44+
*/
45+
public void imagePulled(Proxy proxy, Container container, LocalDateTime pullingTime, LocalDateTime pulledTime) {
46+
startupLog.get(proxy.getId()).getPullImage(container.getIndex()).stepSucceeded(pullingTime, pulledTime);
47+
}
48+
3249
/**
3350
* Step 3: container has been started and application is starting up
3451
*/

src/main/java/eu/openanalytics/containerproxy/stat/impl/Micrometer.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ public void init() {
100100
registry.gauge("absolute_apps_running", Tags.of("spec.id", spec.getId()), proxyCounter, wrapHandleNull(ProxyCounter::getProxyCount));
101101
registry.timer("startupTime", "spec.id", spec.getId());
102102
for (ContainerSpec containerSpec : spec.getContainerSpecs()) {
103+
registry.timer("imagePullTime", "spec.id", spec.getId(), "container.idx", containerSpec.getIndex().toString());
104+
registry.timer("containerScheduleTime", "spec.id", spec.getId(), "container.idx", containerSpec.getIndex().toString());
103105
registry.timer("containerStartupTime", "spec.id", spec.getId(), "container.idx", containerSpec.getIndex().toString());
104106
registry.timer("applicationStartupTime", "spec.id", spec.getId(), "container.idx", containerSpec.getIndex().toString());
105107
}
@@ -137,6 +139,18 @@ public void onProxyStartEvent(ProxyStartEvent event) {
137139
registry.timer("startupTime", "spec.id", event.getSpecId()).record(d);
138140
});
139141

142+
startupLog.getPullImage().forEach((idx, step) -> {
143+
step.getStepDuration().ifPresent((d) -> {
144+
registry.timer("imagePullTime", "spec.id", event.getSpecId(), "container.idx", idx.toString()).record(d);
145+
});
146+
});
147+
148+
startupLog.getScheduleContainer().forEach((idx, step) -> {
149+
step.getStepDuration().ifPresent((d) -> {
150+
registry.timer("containerScheduleTime", "spec.id", event.getSpecId(), "container.idx", idx.toString()).record(d);
151+
});
152+
});
153+
140154
startupLog.getStartContainer().forEach((idx, step) -> {
141155
step.getStepDuration().ifPresent((d) -> {
142156
registry.timer("containerStartupTime", "spec.id", event.getSpecId(), "container.idx", idx.toString()).record(d);
@@ -149,6 +163,7 @@ public void onProxyStartEvent(ProxyStartEvent event) {
149163
});
150164
});
151165

166+
152167
}
153168

154169
@EventListener

0 commit comments

Comments
 (0)