Skip to content

Commit 03096f4

Browse files
committed
Execution Hanging Detection
Creating a new system to detect Job hanging and finalize the execution. Signed-off-by: Rodrigo Nardi <rnardi@netdef.org>
1 parent f343aca commit 03096f4

5 files changed

Lines changed: 51 additions & 2 deletions

File tree

lib/github/plan_execution/finished.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ class Finished
2020
def initialize(payload)
2121
@check_suite = CheckSuite.find_by(bamboo_ci_ref: payload['bamboo_ref'])
2222
@logger = GithubLogger.instance.create('github_plan_execution_finished.log', Logger::INFO)
23+
@hanged = payload['hanged'] || false
2324
end
2425

2526
def finished

lib/github/update_status.rb

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def update_status
6060
case @status
6161
when 'in_progress'
6262
@job.in_progress(@github_check)
63+
create_timeout_worker
6364
when 'success'
6465
@job.success(@github_check)
6566
@job.update_execution_time
@@ -81,6 +82,16 @@ def update_status
8182
[500, 'Internal Server Error']
8283
end
8384

85+
def create_timeout_worker
86+
Delayed::Job.where('handler LIKE ?', "%TimeoutExecution%args%-%#{@check_suite.id}%")&.delete_all
87+
88+
logger(Logger::INFO, "CiJobStatus::Update: TimeoutExecution for '#{@check_suite.id}'")
89+
90+
TimeoutExecution
91+
.delay(run_at: 2.hours.from_now.utc, queue: 'timeout_execution')
92+
.timeout(@check_suite.id)
93+
end
94+
8495
def insert_new_delayed_job
8596
queue = @job.check_suite.pull_request.github_pr_id % 10
8697

@@ -91,7 +102,7 @@ def delete_and_create_delayed_job(queue)
91102
fetch_delayed_job&.destroy_all
92103

93104
CiJobStatus
94-
.delay(run_at: DELAYED_JOB_TIMER.seconds.from_now, queue: queue)
105+
.delay(run_at: DELAYED_JOB_TIMER.seconds.from_now.utc, queue: queue)
95106
.update(@job.check_suite.id, @job.id)
96107
end
97108

@@ -117,7 +128,7 @@ def failure
117128
return failures_stats if @failures.is_a? Array and !@failures.empty?
118129

119130
CiJobFetchTopotestFailures
120-
.delay(run_at: 5.minutes.from_now, queue: 'fetch_topotest_failures')
131+
.delay(run_at: 5.minutes.from_now.utc, queue: 'fetch_topotest_failures')
121132
.update(@job.id, 1)
122133
end
123134

lib/github_ci_app.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
# Workers
4242
require_relative '../workers/ci_job_status'
43+
require_relative '../workers/timeout_execution'
4344
require_relative '../workers/ci_job_fetch_topotest_failures'
4445

4546
# Slack libs

lib/models/check_suite.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,8 @@ def in_progress?
4848
def execution_started?
4949
ci_jobs.where(status: :in_progress).size < 2
5050
end
51+
52+
def last_job_updated_at_timer
53+
ci_jobs.max_by(&:updated_at)&.updated_at.to_i
54+
end
5155
end

workers/timeout_execution.rb

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# SPDX-License-Identifier: BSD-2-Clause
2+
#
3+
# ci_job_status.rb
4+
# Part of NetDEF CI System
5+
#
6+
# Copyright (c) 2024 by
7+
# Network Device Education Foundation, Inc. ("NetDEF")
8+
#
9+
# frozen_string_literal: true
10+
11+
require_relative '../config/setup'
12+
13+
class TimeoutExecution
14+
class << self
15+
def timeout(check_suite_id)
16+
@logger = GithubLogger.instance.create('timeout_execution_worker.log', Logger::INFO)
17+
check_suite = CheckSuite.find(check_suite_id)
18+
19+
@logger.info("Timeout execution for check_suite_id: #{check_suite_id} -> finished? #{check_suite.finished?}")
20+
21+
return false if check_suite.finished?
22+
23+
return false if check_suite.last_job_updated_at_timer > 2.hour.ago.utc
24+
25+
@logger.info("Calling Github::PlanExecution::Finished.new(#{check_suite.bamboo_ci_ref}).finished")
26+
27+
Github::PlanExecution::Finished.new({ 'bamboo_ref' => check_suite.bamboo_ci_ref, hanged: true }).finished
28+
29+
true
30+
end
31+
end
32+
end

0 commit comments

Comments
 (0)