Skip to content

Commit e8a3d0c

Browse files
committed
Merge pull request '#27090: Add more timings to Prometheus statistics' (#65) from feature/27090/prom_timings into develop
2 parents 4c6c497 + a0689f3 commit e8a3d0c

16 files changed

Lines changed: 654 additions & 71 deletions

File tree

src/main/java/eu/openanalytics/containerproxy/backend/AbstractContainerBackend.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import eu.openanalytics.containerproxy.model.spec.ContainerSpec;
4040
import eu.openanalytics.containerproxy.service.AppRecoveryService;
4141
import eu.openanalytics.containerproxy.service.IdentifierService;
42+
import eu.openanalytics.containerproxy.service.ProxyStatusService;
4243
import eu.openanalytics.containerproxy.service.UserService;
4344
import eu.openanalytics.containerproxy.spec.expression.ExpressionAwareContainerSpec;
4445
import eu.openanalytics.containerproxy.spec.expression.SpecExpressionResolver;
@@ -99,6 +100,11 @@ public abstract class AbstractContainerBackend implements IContainerBackend {
99100
@Inject
100101
protected IdentifierService identifierService;
101102

103+
@Inject
104+
@Lazy
105+
// Note: lazy needed to work around early initialization conflict
106+
protected ProxyStatusService proxyStatusService;
107+
102108
@Inject
103109
@Lazy
104110
// Note: lazy to prevent cyclic dependencies
@@ -128,11 +134,13 @@ public SuccessOrFailure<Proxy> startProxy(Proxy proxy) throws ContainerProxyExce
128134

129135
if (!testStrategy.testProxy(proxy)) {
130136
stopProxy(proxy);
137+
proxyStatusService.applicationStartupFailed(proxy);
131138
return SuccessOrFailure.createFailure(proxy, "Container did not respond in time");
132139
}
133140

134141
proxy.setStartupTimestamp(System.currentTimeMillis());
135142
proxy.setStatus(ProxyStatus.Up);
143+
proxyStatusService.proxyStarted(proxy);
136144

137145
return SuccessOrFailure.createSuccess(proxy);
138146
} catch (Throwable t) {

src/main/java/eu/openanalytics/containerproxy/backend/docker/DockerEngineBackend.java

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,18 @@ public void initialize() {
6767

6868
@Override
6969
protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Exception {
70+
Container container = new Container();
71+
container.setSpec(spec);
72+
container.setIndex(spec.getIndex());
73+
7074
Builder hostConfigBuilder = HostConfig.builder();
7175

7276
if (imagePullPolicy == ImagePullPolicy.Always
7377
|| (imagePullPolicy == ImagePullPolicy.IfNotPresent && !isImagePresent(spec))) {
7478
logger.info("Pulling image {}", spec.getImage());
79+
proxyStatusService.imagePulling(proxy, container);
7580
pullImage(spec);
81+
proxyStatusService.imagePulled(proxy, container);
7682
}
7783

7884
Map<String, List<PortBinding>> portBindings = new HashMap<>();
@@ -118,20 +124,26 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
118124
.cmd(spec.getCmd())
119125
.env(convertEnv(buildEnv(spec, proxy)))
120126
.build();
121-
ContainerCreation containerCreation = dockerClient.createContainer(containerConfig);
122-
123-
if (spec.getNetworkConnections() != null) {
124-
for (String networkConnection: spec.getNetworkConnections()) {
125-
dockerClient.connectToNetwork(containerCreation.id(), networkConnection);
127+
128+
try {
129+
// tell the status service we are starting the container
130+
proxyStatusService.containerStarting(proxy, container);
131+
ContainerCreation containerCreation = dockerClient.createContainer(containerConfig);
132+
container.setId(containerCreation.id());
133+
134+
if (spec.getNetworkConnections() != null) {
135+
for (String networkConnection: spec.getNetworkConnections()) {
136+
dockerClient.connectToNetwork(containerCreation.id(), networkConnection);
137+
}
126138
}
139+
140+
dockerClient.startContainer(containerCreation.id());
141+
proxyStatusService.containerStarted(proxy, container);
142+
} catch (DockerException ex) {
143+
proxyStatusService.containerStartFailed(proxy, container);
144+
throw ex;
127145
}
128-
129-
dockerClient.startContainer(containerCreation.id());
130-
131-
Container container = new Container();
132-
container.setSpec(spec);
133-
container.setId(containerCreation.id());
134-
146+
135147
// Calculate proxy routes for all configured ports.
136148
for (String mappingKey: spec.getPortMapping().keySet()) {
137149
int containerPort = spec.getPortMapping().get(mappingKey);

src/main/java/eu/openanalytics/containerproxy/backend/docker/DockerSwarmBackend.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ public void initialize() throws ContainerProxyException {
7878
protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Exception {
7979
Container container = new Container();
8080
container.setSpec(spec);
81+
container.setIndex(spec.getIndex());
8182

8283
Mount[] mounts = null;
8384
if (spec.getVolumes() != null) mounts = Arrays.stream(spec.getVolumes())
@@ -176,10 +177,12 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
176177
serviceId = dockerClient.createService(serviceSpecBuilder.build()).id();
177178
}
178179

180+
// tell the status service we are starting the container
181+
proxyStatusService.containerStarting(proxy, container);
179182
container.getParameters().put(PARAM_SERVICE_ID, serviceId);
180183

181184
// Give the service some time to start up and launch a container.
182-
boolean containerFound = Retrying.retry(i -> {
185+
boolean containerFound = Retrying.retry((i, maxAttempts) -> {
183186
try {
184187
Task serviceTask = dockerClient
185188
.listTasks(Task.Criteria.builder().serviceName(serviceName).build())
@@ -191,12 +194,14 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
191194
throw new RuntimeException("Failed to inspect swarm service tasks", e);
192195
}
193196
return (container.getId() != null);
194-
}, 30, 2000, true);
197+
}, 60_000, true);
195198

196199
if (!containerFound) {
197200
dockerClient.removeService(serviceId);
201+
proxyStatusService.containerStartFailed(proxy, container);
198202
throw new IllegalStateException("Swarm container did not start in time");
199203
}
204+
proxyStatusService.containerStarted(proxy, container);
200205

201206
// Calculate proxy routes for all configured ports.
202207
for (String mappingKey: spec.getPortMapping().keySet()) {

src/main/java/eu/openanalytics/containerproxy/backend/kubernetes/KubernetesBackend.java

Lines changed: 93 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@
4444
import io.fabric8.kubernetes.api.model.ContainerPortBuilder;
4545
import io.fabric8.kubernetes.api.model.EnvVar;
4646
import io.fabric8.kubernetes.api.model.EnvVarSourceBuilder;
47+
import io.fabric8.kubernetes.api.model.Event;
4748
import io.fabric8.kubernetes.api.model.GenericKubernetesResource;
4849
import io.fabric8.kubernetes.api.model.GenericKubernetesResourceList;
4950
import io.fabric8.kubernetes.api.model.HasMetadata;
5051
import io.fabric8.kubernetes.api.model.LocalObjectReference;
5152
import io.fabric8.kubernetes.api.model.ObjectMetaBuilder;
53+
import io.fabric8.kubernetes.api.model.ObjectReferenceBuilder;
5254
import io.fabric8.kubernetes.api.model.Pod;
5355
import io.fabric8.kubernetes.api.model.PodBuilder;
5456
import io.fabric8.kubernetes.api.model.ServiceBuilder;
@@ -68,6 +70,7 @@
6870
import io.fabric8.kubernetes.client.ConfigBuilder;
6971
import io.fabric8.kubernetes.client.DefaultKubernetesClient;
7072
import io.fabric8.kubernetes.client.KubernetesClient;
73+
import io.fabric8.kubernetes.client.KubernetesClientException;
7174
import io.fabric8.kubernetes.client.dsl.LogWatch;
7275
import io.fabric8.kubernetes.client.dsl.NonNamespaceOperation;
7376
import io.fabric8.kubernetes.client.dsl.Resource;
@@ -87,6 +90,9 @@
8790
import java.nio.file.Files;
8891
import java.nio.file.Path;
8992
import java.nio.file.Paths;
93+
import java.time.LocalDateTime;
94+
import java.time.OffsetDateTime;
95+
import java.time.ZoneId;
9096
import java.util.ArrayList;
9197
import java.util.Arrays;
9298
import java.util.Collections;
@@ -159,6 +165,7 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
159165
Container container = new Container();
160166
container.setSpec(spec);
161167
container.setId(UUID.randomUUID().toString());
168+
container.setIndex(spec.getIndex());
162169

163170
String kubeNamespace = getProperty(PROPERTY_NAMESPACE, DEFAULT_NAMESPACE);
164171
String apiVersion = getProperty(PROPERTY_API_VERSION, DEFAULT_API_VERSION);
@@ -281,29 +288,41 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
281288
final String effectiveKubeNamespace = patchedPod.getMetadata().getNamespace(); // use the namespace of the patched Pod, in case the patch changes the namespace.
282289
container.getParameters().put(PARAM_NAMESPACE, effectiveKubeNamespace);
283290

284-
// create additional manifests -> use the effective (i.e. patched) namespace if no namespace is provided
291+
// create additional manifests -> use the effective f(i.e. patched) namespace if no namespace is provided
285292
createAdditionalManifests(proxy, effectiveKubeNamespace);
286293

294+
// tell the status service we are starting the pod/container
295+
proxyStatusService.containerStarting(proxy, container);
296+
297+
// create and start the pod
287298
Pod startedPod = kubeClient.pods().inNamespace(effectiveKubeNamespace).create(patchedPod);
288299

289300
int totalWaitMs = Integer.parseInt(environment.getProperty("proxy.kubernetes.pod-wait-time", "60000"));
290-
int maxTries = totalWaitMs / 1000;
291-
Retrying.retry(i -> {
292-
if (!Readiness.getInstance().isReady(kubeClient.resource(startedPod).fromServer().get())) {
293-
if (i > 1 && log != null) log.debug(String.format("Container not ready yet, trying again (%d/%d)", i, maxTries));
294-
return false;
295-
}
296-
return true;
301+
boolean podReady = Retrying.retry((currentAttempt, maxAttempts) -> {
302+
if (!Readiness.getInstance().isReady(kubeClient.resource(startedPod).fromServer().get())) {
303+
if (currentAttempt > 10 && log != null) log.info(String.format("Container not ready yet, trying again (%d/%d)", currentAttempt, maxAttempts));
304+
return false;
305+
}
306+
return true;
307+
}, totalWaitMs);
308+
309+
if (!podReady) {
310+
// check a final time whether the pod is ready
311+
if (!Readiness.getInstance().isReady(kubeClient.resource(startedPod).fromServer().get())) {
312+
Pod pod = kubeClient.resource(startedPod).fromServer().get();
313+
container.getParameters().put(PARAM_POD, pod);
314+
proxy.getContainers().add(container);
315+
316+
proxyStatusService.containerStartFailed(proxy, container);
317+
throw new ContainerProxyException("Container did not become ready in time");
297318
}
298-
, maxTries, 1000);
299-
if (!Readiness.getInstance().isReady(kubeClient.resource(startedPod).fromServer().get())) {
300-
Pod pod = kubeClient.resource(startedPod).fromServer().get();
301-
container.getParameters().put(PARAM_POD, pod);
302-
proxy.getContainers().add(container);
303-
throw new ContainerProxyException("Container did not become ready in time");
304319
}
320+
321+
proxyStatusService.containerStarted(proxy, container);
305322
Pod pod = kubeClient.resource(startedPod).fromServer().get();
306323

324+
parseKubernetesEvents(proxy, container, pod);
325+
307326
Service service = null;
308327
if (isUseInternalNetwork()) {
309328
// If SP runs inside the cluster, it can access pods directly and doesn't need any port publishing service.
@@ -328,7 +347,7 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
328347
.build());
329348

330349
// Workaround: waitUntilReady appears to be buggy.
331-
Retrying.retry(i -> isServiceReady(kubeClient.resource(startupService).fromServer().get()), 60, 1000);
350+
Retrying.retry((currentAttempt, maxAttempts) -> isServiceReady(kubeClient.resource(startupService).fromServer().get()), 60_000);
332351

333352
service = kubeClient.resource(startupService).fromServer().get();
334353
}
@@ -354,6 +373,65 @@ protected Container startContainer(ContainerSpec spec, Proxy proxy) throws Excep
354373
return container;
355374
}
356375

376+
private LocalDateTime getEventTime(Event event) {
377+
if (event.getEventTime() != null && event.getEventTime().getTime() != null) {
378+
return OffsetDateTime.parse(event.getEventTime().getTime()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
379+
}
380+
381+
if (event.getFirstTimestamp() != null) {
382+
return OffsetDateTime.parse(event.getFirstTimestamp()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
383+
}
384+
385+
if (event.getLastTimestamp() != null) {
386+
return OffsetDateTime.parse(event.getLastTimestamp()).atZoneSameInstant(ZoneId.systemDefault()).toLocalDateTime();
387+
}
388+
389+
return null;
390+
}
391+
392+
private void parseKubernetesEvents(Proxy proxy, Container container, Pod pod) {
393+
List<Event> events;
394+
try {
395+
events = kubeClient.v1().events().withInvolvedObject(new ObjectReferenceBuilder()
396+
.withKind("Pod")
397+
.withName(pod.getMetadata().getName())
398+
.withNamespace(pod.getMetadata().getNamespace())
399+
.build()).list().getItems();
400+
} catch (KubernetesClientException ex) {
401+
if (ex.getCode() == 403) {
402+
log.warn("Cannot parse events of pod because of insufficient permissions. If fine-grained statistics are desired, give the ShinyProxy ServiceAccount permission to events of pods.");
403+
return;
404+
}
405+
throw ex;
406+
}
407+
408+
LocalDateTime pullingTime = null;
409+
LocalDateTime pulledTime = null;
410+
LocalDateTime scheduledTime = null;
411+
412+
for (Event event : events) {
413+
if (event.getCount() != null && event.getCount() > 1) {
414+
// ignore events which happened multiple time as we are unable to properly process them
415+
continue;
416+
}
417+
if (event.getReason().equalsIgnoreCase("Pulling")) {
418+
pullingTime = getEventTime(event);
419+
} else if (event.getReason().equalsIgnoreCase("Pulled")) {
420+
pulledTime = getEventTime(event);
421+
} else if (event.getReason().equalsIgnoreCase("Scheduled")) {
422+
scheduledTime = getEventTime(event);
423+
}
424+
}
425+
426+
if (pullingTime != null && pulledTime != null) {
427+
proxyStatusService.imagePulled(proxy, container, pullingTime, pulledTime);
428+
}
429+
430+
if (scheduledTime != null) {
431+
proxyStatusService.containerScheduled(proxy, container, scheduledTime);
432+
}
433+
}
434+
357435
private JsonPatch readPatchFromSpec(ContainerSpec containerSpec, Proxy proxy) throws JsonMappingException, JsonProcessingException {
358436
String patchAsString = proxy.getSpec().getKubernetesPodPatch();
359437
if (patchAsString == null) {

src/main/java/eu/openanalytics/containerproxy/event/ProxyStartEvent.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@
2626

2727
public class ProxyStartEvent extends ApplicationEvent {
2828

29+
private final String proxyId;
2930
private final String userId;
3031
private final String specId;
31-
private final Duration startupTime;
3232

33-
public ProxyStartEvent(Object source, String userId, String specId, Duration startupTime) {
33+
public ProxyStartEvent(Object source, String proxyId, String userId, String specId) {
3434
super(source);
35+
this.proxyId = proxyId;
3536
this.userId = userId;
3637
this.specId = specId;
37-
this.startupTime = startupTime;
3838
}
3939

4040
public String getUserId() {
@@ -45,7 +45,7 @@ public String getSpecId() {
4545
return specId;
4646
}
4747

48-
public Duration getStartupTime() {
49-
return startupTime;
48+
public String getProxyId() {
49+
return proxyId;
5050
}
5151
}

src/main/java/eu/openanalytics/containerproxy/model/runtime/Container.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929

3030
public class Container {
3131

32+
/**
33+
* Index in the array of ContainerSpecs of the ProxySpec.
34+
*/
35+
private Integer index;
3236
private String id;
3337
private ContainerSpec spec;
3438

@@ -37,12 +41,15 @@ public class Container {
3741
public String getId() {
3842
return id;
3943
}
44+
4045
public void setId(String id) {
4146
this.id = id;
4247
}
48+
4349
public ContainerSpec getSpec() {
4450
return spec;
4551
}
52+
4653
public void setSpec(ContainerSpec spec) {
4754
this.spec = spec;
4855
}
@@ -51,8 +58,16 @@ public void setSpec(ContainerSpec spec) {
5158
public Map<String, Object> getParameters() {
5259
return parameters;
5360
}
61+
5462
public void setParameters(Map<String, Object> parameters) {
5563
this.parameters = parameters;
5664
}
5765

66+
public Integer getIndex() {
67+
return index;
68+
}
69+
70+
public void setIndex(Integer index) {
71+
this.index = index;
72+
}
5873
}

0 commit comments

Comments
 (0)