Skip to content

Commit

Permalink
Merge pull request #955 from MaximilianMeister/nodes-upgrade-error
Browse files Browse the repository at this point in the history
upgrade: node upgrade cleanup
  • Loading branch information
jsuchome authored Jan 9, 2017
2 parents cdd9474 + cba4625 commit 7badddd
Showing 1 changed file with 154 additions and 128 deletions.
282 changes: 154 additions & 128 deletions crowbar_framework/app/models/api/upgrade.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ def status
::Crowbar::UpgradeStatus.new.progress
end

#
# prechecks
#
def checks
upgrade_status = ::Crowbar::UpgradeStatus.new
# the check for current_step means to allow running the step at any point in time
Expand Down Expand Up @@ -99,28 +102,24 @@ def best_method
end
end

def noderepocheck
upgrade_status = ::Crowbar::UpgradeStatus.new
upgrade_status.start_step(:nodes_repo_checks)
#
# prepare upgrade
#
def prepare(options = {})
::Crowbar::UpgradeStatus.new.start_step(:upgrade_prepare)

response = {}
addons = Api::Crowbar.addons
addons.push("os", "openstack").each do |addon|
response.merge!(Api::Node.repocheck(addon: addon))
end
background = options.fetch(:background, false)

unavailable_repos = response.select { |_k, v| !v["available"] }
if unavailable_repos.any?
upgrade_status.end_step(
false,
nodes_repo_checks: "#{unavailable_repos.keys.join(", ")} repositories are missing"
)
if background
prepare_nodes_for_crowbar_upgrade_background
else
upgrade_status.end_step
prepare_nodes_for_crowbar_upgrade
end
response
end

#
# repocheck
#
def adminrepocheck
upgrade_status = ::Crowbar::UpgradeStatus.new
upgrade_status.start_step(:admin_repo_checks)
Expand Down Expand Up @@ -184,6 +183,28 @@ def adminrepocheck
end
end

def noderepocheck
upgrade_status = ::Crowbar::UpgradeStatus.new
upgrade_status.start_step(:nodes_repo_checks)

response = {}
addons = Api::Crowbar.addons
addons.push("os", "openstack").each do |addon|
response.merge!(Api::Node.repocheck(addon: addon))
end

unavailable_repos = response.select { |_k, v| !v["available"] }
if unavailable_repos.any?
upgrade_status.end_step(
false,
nodes_repo_checks: "#{unavailable_repos.keys.join(", ")} repositories are missing"
)
else
upgrade_status.end_step
end
response
end

def target_platform(options = {})
platform_exception = options.fetch(:platform_exception, nil)

Expand All @@ -197,7 +218,9 @@ def target_platform(options = {})
end
end

# Shutdown non-essential services on all nodes.
#
# service shutdown
#
def services
begin
# prepare the scripts for various actions necessary for the upgrade
Expand Down Expand Up @@ -246,6 +269,9 @@ def services
end
handle_asynchronously :services

#
# cancel upgrade
#
def cancel
upgrade_status = ::Crowbar::UpgradeStatus.new
unless upgrade_status.cancel_allowed?
Expand All @@ -260,7 +286,9 @@ def cancel
upgrade_status.initialize_state
end

# Orchestrate the upgrade of the nodes
#
# nodes upgrade
#
def nodes
status = ::Crowbar::UpgradeStatus.new

Expand Down Expand Up @@ -297,20 +325,35 @@ def nodes
end
handle_asynchronously :nodes

def prepare(options = {})
::Crowbar::UpgradeStatus.new.start_step(:upgrade_prepare)
protected

background = options.fetch(:background, false)
#
# controller nodes upgrade
#
def upgrade_controller_nodes
drbd_nodes = Node.find("drbd_rsc:*")
return upgrade_drbd_clusters unless drbd_nodes.empty?

if background
prepare_nodes_for_crowbar_upgrade_background
else
prepare_nodes_for_crowbar_upgrade
founder = Node.find(
"state:crowbar_upgrade AND pacemaker_founder:true"
).first
cluster_env = founder[:pacemaker][:config][:environment]

non_founder = Node.find(
"state:crowbar_upgrade AND pacemaker_founder:false AND " \
"pacemaker_config_environment:#{cluster_env}"
).first

upgrade_first_cluster_node founder, non_founder

# upgrade the rest of nodes in the same cluster
Node.find(
"state:crowbar_upgrade AND pacemaker_config_environment:#{cluster_env}"
).each do |node|
upgrade_next_cluster_node node.name, founder.name
end
end

protected

# Method for upgrading first node of the cluster
# other_node_name argument is the name of any other node in the same cluster
def upgrade_first_cluster_node(node, other_node)
Expand Down Expand Up @@ -357,30 +400,6 @@ def upgrade_next_cluster_node(node, founder)
node_api.save_node_state("controller", "upgraded")
end

def upgrade_controller_nodes
drbd_nodes = ::Node.find("drbd_rsc:*")
return upgrade_drbd_clusters unless drbd_nodes.empty?

founder = ::Node.find(
"state:crowbar_upgrade AND pacemaker_founder:true"
).first
cluster_env = founder[:pacemaker][:config][:environment]

non_founder = ::Node.find(
"state:crowbar_upgrade AND pacemaker_founder:false AND " \
"pacemaker_config_environment:#{cluster_env}"
).first

upgrade_first_cluster_node founder, non_founder

# upgrade the rest of nodes in the same cluster
::Node.find(
"state:crowbar_upgrade AND pacemaker_config_environment:#{cluster_env}"
).each do |node|
upgrade_next_cluster_node node.name, founder.name
end
end

def upgrade_drbd_clusters
::Node.find(
"state:crowbar_upgrade AND pacemaker_founder:true"
Expand Down Expand Up @@ -472,6 +491,71 @@ def evacuate_network_node(controller, network_node, delete_namespaces = false)
)
end

#
# compute nodes upgrade
#
def upgrade_all_compute_nodes
["kvm", "xen"].each do |virt|
return false unless upgrade_compute_nodes virt
end
true
end

def upgrade_compute_nodes(virt)
save_upgrade_state("Upgrading compute nodes of #{virt} type")
compute_nodes = Node.find("roles:nova-compute-#{virt}")
return true if compute_nodes.empty?

controller = Node.find("roles:nova-controller").first
if controller.nil?
raise_upgrade_error("No nova controller node was found!")
return false
end

# First batch of actions can be executed in parallel for all compute nodes
begin
execute_scripts_and_wait_for_finish(
compute_nodes,
"/usr/sbin/crowbar-prepare-repositories.sh",
120
)
save_upgrade_state("Repositories prepared successfully.")
execute_scripts_and_wait_for_finish(
compute_nodes,
"/usr/sbin/crowbar-pre-upgrade.sh",
300
)
save_upgrade_state("Services at compute nodes upgraded and prepared.")
rescue StandardError => e
raise_upgrade_error(
"Error while preparing services at compute nodes. " + e.message
)
end

# Next part must be done sequentially, only one compute node can be upgraded at a time
compute_nodes.each do |n|
node_api = Api::Node.new n.name
live_evacuate_compute_node(controller, n.name)
node_api.os_upgrade
node_api.reboot_and_wait
node_api.post_upgrade
node_api.join_and_chef

out = controller.run_ssh_cmd(
"source /root/.openrc; nova service-enable #{n.name} nova-compute"
)
unless out[:exit_code].zero?
raise_upgrade_error(
"Enabling nova-compute service for #{n.name} has failed!" \
"Check nova log files at #{controller.name} and #{n.name}."
)
end
save_upgrade_state("Node #{n.name} successfully upgraded.")
end
# FIXME: finalize compute nodes (move upgrade_step to done etc.)
true
end

# Live migrate all instances of the specified
# node to other available hosts.
def live_evacuate_compute_node(controller, compute)
Expand All @@ -498,13 +582,6 @@ def raise_upgrade_error(message = "")
raise message
end

def upgrade_all_compute_nodes
["kvm", "xen"].each do |virt|
return false unless upgrade_compute_nodes virt
end
true
end

# Take a list of nodes and execute given script at each node in the background
# Wait until all scripts at all nodes correctly finish or until some error is detected
def execute_scripts_and_wait_for_finish(nodes, script, seconds)
Expand Down Expand Up @@ -543,66 +620,9 @@ def execute_scripts_and_wait_for_finish(nodes, script, seconds)
end
end

def upgrade_compute_nodes(virt)
save_upgrade_state("Upgrading compute nodes of #{virt} type")
compute_nodes = ::Node.find("roles:nova-compute-#{virt}")
return true if compute_nodes.empty?

controller = ::Node.find("roles:nova-controller").first
if controller.nil?
raise_upgrade_error("No nova controller node was found!")
return false
end

# First batch of actions can be executed in parallel for all compute nodes
begin
execute_scripts_and_wait_for_finish(
compute_nodes,
"/usr/sbin/crowbar-prepare-repositories.sh",
120
)
save_upgrade_state("Repositories prepared successfully.")
execute_scripts_and_wait_for_finish(
compute_nodes,
"/usr/sbin/crowbar-pre-upgrade.sh",
300
)
save_upgrade_state("Services at compute nodes upgraded and prepared.")
rescue StandardError => e
raise_upgrade_error(
"Error while preparing services at compute nodes. " + e.message
)
end

# Next part must be done sequentially, only one compute node can be upgraded at a time
compute_nodes.each do |n|
node_api = Api::Node.new n.name
live_evacuate_compute_node(controller, n.name)
node_api.os_upgrade
node_api.reboot_and_wait
node_api.post_upgrade
node_api.join_and_chef

out = controller.run_ssh_cmd(
"source /root/.openrc; nova service-enable #{n.name} nova-compute"
)
unless out[:exit_code].zero?
raise_upgrade_error(
"Enabling nova-compute service for #{n.name} has failed!" \
"Check nova log files at #{controller.name} and #{n.name}."
)
end
save_upgrade_state("Node #{n.name} successfully upgraded.")
end
# FIXME: finalize compute nodes (move upgrade_step to done etc.)
true
end

def crowbar_upgrade_status
Api::Crowbar.upgrade
end

# Check Errors
#
# prechecks helpers
#
# all of the below errors return a hash with the following schema:
# code: {
# data: ... whatever data type ...,
Expand Down Expand Up @@ -674,16 +694,9 @@ def compute_resources_check_errors(check)
}
end

def repo_version_available?(products, product, version)
products.any? do |p|
p["version"] == version && p["name"] == product
end
end

def admin_architecture
::Node.admin_node.architecture
end

#
# prepare upgrade helpers
#
def prepare_nodes_for_crowbar_upgrade_background
@thread = Thread.new do
Rails.logger.debug("Started prepare in a background thread")
Expand All @@ -709,6 +722,19 @@ def prepare_nodes_for_crowbar_upgrade

false
end

#
# repocheck helpers
#
def repo_version_available?(products, product, version)
products.any? do |p|
p["version"] == version && p["name"] == product
end
end

def admin_architecture
::Node.admin_node.architecture
end
end
end
end

0 comments on commit 7badddd

Please sign in to comment.