Skip to content

Commit 69652a1

Browse files
committed
Wait for all nodes being added before running ansible
Openshift compute ndoes are members of AutoscalingGroup in heat stack, when each member is created it registers itself on bastion node ("deployment_bastion_node_add" SW deployment in templates), then a separate step ("deployment_run_ansible" in node.yaml) configures openshift on all nodes which are already registered. It may happen that deployment_run_ansible is trigerred by some node before all nodes are registered (by deployment_bastion_node_add) which causes that then nodes are added in multiple ansible runs/batches. It doesn't break setup and basically is nothing bad with it but it makes creation or scale up slower. It would be better to wait until deployment_bastion_node_add is done for all nodes and then run deployment_run_ansible only once for all nodes (AutoscalingGroup doesn't allow to do this easily though). This patch uses node_count param to get desired number of nodes, this can not work with autoscaling though so it's used only for not-autoscaling create/update scenario.
1 parent f863d7f commit 69652a1

4 files changed

Lines changed: 58 additions & 2 deletions

File tree

bastion.yaml

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ description: >
77
88
parameters:
99

10+
node_count:
11+
type: number
12+
description: >
13+
Number of non-master nodes to create.
14+
1015
# What version of OpenShift Container Platform to install
1116
# This value is used to select the RPM repo for the OCP release to install
1217
ocp_version:
@@ -376,6 +381,29 @@ resources:
376381
- get_file: templates/var/lib/ansible/roles/fstab_mount_options/tasks/main.yml
377382
- get_file: templates/var/lib/ansible/roles/xfs_grub_quota/tasks/main.yml
378383

384+
update_node_count:
385+
type: OS::Heat::SoftwareConfig
386+
properties:
387+
group: script
388+
inputs:
389+
- name: node_count
390+
config: |
391+
#!/bin/bash
392+
set -eux
393+
mkdir -p /var/lib/ansible
394+
echo "$node_count" > /var/lib/ansible/node_count
395+
396+
deployment_update_node_count:
397+
depends_on: wait_condition
398+
type: OS::Heat::SoftwareDeployment
399+
properties:
400+
config:
401+
get_resource: update_node_count
402+
server:
403+
get_resource: host
404+
input_values:
405+
node_count: {get_param: node_count}
406+
379407
deployment_write_templates:
380408
depends_on: wait_condition
381409
type: OS::Heat::SoftwareDeployment

fragments/bastion-ansible.sh

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,13 +134,27 @@ function is_scaleup() {
134134
grep -v '.*-node') && return 1 || return 0
135135
}
136136

137+
function backup_ansdir() {
138+
[ -e ${ANSDIR}.deployed ] && rm -rf ${ANSDIR}.deployed
139+
mv ${ANSDIR}.started ${ANSDIR}.deployed
140+
}
141+
137142
[ "$prepare_ansible" == "False" ] && exit 0
138143

139144
mkdir -p /var/lib/ansible/group_vars
140145
mkdir -p /var/lib/ansible/host_vars
141146

142147
touch $NODESFILE
143148

149+
existing=$(wc -l < $NODESFILE)
150+
if [ -e /var/lib/ansible/node_count ]; then
151+
node_count=$(cat /var/lib/ansible/node_count)
152+
if [ $existing -lt $node_count -a "$autoscaling" != "True" ]; then
153+
echo "skipping ansible run - only $existing of $node_count is registered"
154+
exit 0
155+
fi
156+
fi
157+
144158
create_metadata_json /var/lib/ansible/metadata.json
145159

146160
# generate ansible files from templates (located
@@ -184,6 +198,11 @@ export ANSIBLE_HOST_KEY_CHECKING=False
184198

185199
logfile=/var/log/ansible.$$
186200
if is_scaleup; then
201+
if [ -z $(get_new_nodes) ]; then
202+
echo "There are no new nodes, not running scalup playbook"
203+
backup_ansdir
204+
exit 0
205+
fi
187206
cmd="ansible-playbook -vvvv --inventory /var/lib/ansible/inventory \
188207
/var/lib/ansible/playbooks/scaleup.yml"
189208
else
@@ -197,8 +216,7 @@ if [ "$execute_ansible" == True ] ; then
197216
echo "Failed to run '$cmd', full log is in $(hostname):$logfile" >&2
198217
exit 1
199218
else
200-
[ -e ${ANSDIR}.deployed ] && rm -rf ${ANSDIR}.deployed
201-
mv ${ANSDIR}.started ${ANSDIR}.deployed
219+
backup_ansdir
202220
fi
203221
else
204222
echo "INFO: ansible execution disabled"

node.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,12 @@ parameters:
346346
type: string
347347
description: Extra parameters for openshift-ansible
348348

349+
autoscaling:
350+
type: boolean
351+
description: >
352+
Automatically scale up/down openshift nodes.
353+
default: false
354+
349355
resources:
350356

351357
# Generate a string to distinguish one node from the others
@@ -704,6 +710,8 @@ resources:
704710
default: {get_param: volume_quota}
705711
- name: extra_openshift_ansible_params
706712
default: {get_param: extra_openshift_ansible_params}
713+
- name: autoscaling
714+
default: {get_param: autoscaling}
707715
outputs:
708716
- name: ca_cert
709717
- name: ca_key

openshift.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ resources:
549549
depends_on: [external_router_interface, fixed_network, fixed_subnet, registry_volume]
550550
type: bastion.yaml
551551
properties:
552+
node_count: {get_param: node_count}
552553
ocp_version: {get_param: ocp_version}
553554
osp_version: {get_param: osp_version}
554555
ansible_version: {get_param: ansible_version}
@@ -680,6 +681,7 @@ resources:
680681
resource:
681682
type: node.yaml
682683
properties:
684+
autoscaling: {get_param: autoscaling}
683685
ocp_version: {get_param: ocp_version}
684686
image: {get_param: node_image}
685687
flavor: {get_param: node_flavor}

0 commit comments

Comments
 (0)