diff --git a/heartbeat/galera b/heartbeat/galera index c8f47151ed..6165fd16a8 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -25,7 +25,7 @@ ## # README. -# +# # This agent only supports being configured as a multistate Master # resource. # @@ -49,7 +49,7 @@ # pcs resource create db galera enable_creation=true \ # wsrep_cluster_address="gcomm://rhel7-auto1,rhel7-auto2,rhel7-auto3" meta master-max=3 --master # -# By setting the 'enable_creation' option, the database will be automatically +# By setting the 'enable_creation' option, the database will be automatically # generated at startup. The meta attribute 'master-max=3' means that all 3 # nodes listed in the wsrep_cluster_address list will be allowed to connect # to the galera cluster and perform replication. @@ -57,7 +57,7 @@ # NOTE: If you have more nodes in the pacemaker cluster then you wish # to have in the galera cluster, make sure to use location contraints to prevent # pacemaker from attempting to place a galera instance on a node that is -# not in the 'wsrep_cluster_address" list. +# not in the 'wsrep_cluster_address" list. # ## @@ -101,7 +101,9 @@ UEND } meta_data() { - cat < @@ -249,6 +251,8 @@ Cluster check user password +${extra_parameters} + @@ -331,7 +335,7 @@ get_last_commit() if [ -z "$node" ]; then ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null - else + else ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null fi } @@ -411,7 +415,7 @@ master_exists() return 1 fi # determine if a master instance is already up and is healthy - ${HA_SBIN_DIR}/crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 return $? } @@ -420,7 +424,7 @@ clear_master_score() local node=$(ocf_attribute_target $1) if [ -z "$node" ]; then $CRM_MASTER -D - else + else $CRM_MASTER -D -N $node fi } @@ -431,7 +435,7 @@ set_master_score() if [ -z "$node" ]; then $CRM_MASTER -v 100 - else + else $CRM_MASTER -N $node -v 100 fi } @@ -480,6 +484,23 @@ pcmk_to_galera_name() } +all_bootstrap_candidates() +{ + local pcmk_nodes="" + all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') + + for node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $node) + if [ -z "$pcmk_node" ]; then + ocf_log err "Could not determine pacemaker node from galera name <${node}>." + return + fi + + pcmk_nodes="$pcmk_nodes $pcmk_node" + done + echo "$pcmk_nodes" +} + detect_first_master() { local best_commit=0 @@ -492,24 +513,11 @@ detect_first_master() local best_node local safe_to_bootstrap - all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') - best_node_gcomm=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') - best_node=$(galera_to_pcmk_name $best_node_gcomm) - if [ -z "$best_node" ]; then - ocf_log err "Could not determine initial best node from galera name <${best_node_gcomm}>." - return - fi + all_nodes=$(all_bootstrap_candidates) + best_node=$(echo "$all_nodes" | sed 's/^.* \(.*\)$/\1/') # avoid selecting a recovered node as bootstrap if possible for node in $all_nodes; do - local pcmk_node=$(galera_to_pcmk_name $node) - if [ -z "$pcmk_node" ]; then - ocf_log err "Could not determine pacemaker node from galera name <${node}>." - return - else - node=$pcmk_node - fi - if is_no_grastate $node; then nodes_recovered="$nodes_recovered $node" else @@ -529,6 +537,8 @@ detect_first_master() # We don't need to wait for the other nodes to report state in this case missing_nodes=0 break + else + ocf_log info "Node <${node}> is not marked as safe to bootstrap, continuing to look." fi last_commit=$(get_last_commit $node) @@ -914,64 +924,70 @@ galera_validate() mysql_common_validate } -case "$1" in - meta-data) meta_data - exit $OCF_SUCCESS;; - usage|help) usage - exit $OCF_SUCCESS;; -esac - -galera_validate -rc=$? -LSB_STATUS_STOPPED=3 -if [ $rc -ne 0 ]; then +cmd_main() { case "$1" in - stop) exit $OCF_SUCCESS;; - monitor) exit $OCF_NOT_RUNNING;; - status) exit $LSB_STATUS_STOPPED;; - *) exit $rc;; + meta-data) meta_data + exit $OCF_SUCCESS;; + usage|help) usage + exit $OCF_SUCCESS;; esac -fi -if [ -z "${OCF_RESKEY_check_passwd}" ]; then - # This value is automatically sourced from /etc/sysconfig/checkcluster if available - OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} -fi -if [ -z "${OCF_RESKEY_check_user}" ]; then - # This value is automatically sourced from /etc/sysconfig/checkcluster if available - OCF_RESKEY_check_user=${MYSQL_USERNAME} -fi -: ${OCF_RESKEY_check_user="root"} + galera_validate + rc=$? + LSB_STATUS_STOPPED=3 + if [ $rc -ne 0 ]; then + case "$1" in + stop) exit $OCF_SUCCESS;; + monitor) exit $OCF_NOT_RUNNING;; + status) exit $LSB_STATUS_STOPPED;; + *) exit $rc;; + esac + fi -MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" -if [ -n "${OCF_RESKEY_check_passwd}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" -fi + if [ -z "${OCF_RESKEY_check_passwd}" ]; then + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + OCF_RESKEY_check_passwd=${MYSQL_PASSWORD} + fi + if [ -z "${OCF_RESKEY_check_user}" ]; then + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + OCF_RESKEY_check_user=${MYSQL_USERNAME} + fi + : ${OCF_RESKEY_check_user="root"} -# This value is automatically sourced from /etc/sysconfig/checkcluster if available -if [ -n "${MYSQL_HOST}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" -fi + MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}" + if [ -n "${OCF_RESKEY_check_passwd}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}" + fi -# This value is automatically sourced from /etc/sysconfig/checkcluster if available -if [ -n "${MYSQL_PORT}" ]; then - MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" -fi + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + if [ -n "${MYSQL_HOST}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}" + fi + # This value is automatically sourced from /etc/sysconfig/checkcluster if available + if [ -n "${MYSQL_PORT}" ]; then + MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}" + fi + # What kind of method was invoked? + case "$1" in + start) galera_start;; + stop) galera_stop;; + status) mysql_common_status err;; + monitor) galera_monitor;; + promote) galera_promote;; + demote) galera_demote;; + validate-all) exit $OCF_SUCCESS;; + + *) usage + exit $OCF_ERR_UNIMPLEMENTED;; + esac +} -# What kind of method was invoked? -case "$1" in - start) galera_start;; - stop) galera_stop;; - status) mysql_common_status err;; - monitor) galera_monitor;; - promote) galera_promote;; - demote) galera_demote;; - validate-all) exit $OCF_SUCCESS;; +# run 'main' if we aren't "sourceonly" +if [ $1 != "sourceonly" ]; then + cmd_main $@ +fi - *) usage - exit $OCF_ERR_UNIMPLEMENTED;; -esac # vi:sw=4:ts=4:et: diff --git a/heartbeat/stretch_galera b/heartbeat/stretch_galera new file mode 100755 index 0000000000..d743878926 --- /dev/null +++ b/heartbeat/stretch_galera @@ -0,0 +1,279 @@ +#!/bin/sh +# +# Copyright (c) 2014 David Vossel +# All Rights Reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of version 2 of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# Further, this software is distributed without any warranty that it is +# free of the rightful claim of any third person regarding infringement +# or the like. Any license provided herein, whether implied or +# otherwise, applies only to this software file. Patent licenses, if +# any, provided herein do not apply to combinations of this program with +# other software, or any other product whatsoever. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. +# + +## +# README. +# +# Extend the Galera resource agent to support joining into a galera cluster +# that is managed by multiple pacemaker clusters. +## + +####################################################################### +# Initialization: + +: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} +. ${OCF_ROOT}/resource.d/heartbeat/galera sourceonly + + +SSH_CMD="ssh -oConnectTimeout=5 -oStrictHostKeyChecking=no" + +# copy original meta_data to galera_meta_data +eval "$(echo "galera_meta_data()"; declare -f meta_data | tail -n +2)" + +meta_data() { +IFS='' read -r -d '' extraparams < + +A mapping of pacemaker node names to remote hosts. + +Allows pacemaker nodes in remote pacemaker clusters to be part of this +Galera cluster: + +root@pacemakerhost/pcmk1:node.1.galera;root@pacemakerhost/pcmk2:node.2.galera + + +Pacemaker to Galera name mapping + + + +END + +galera_meta_data "$extraparams" + +} + +is_initial_bootstrap() +{ + # look for the initial-bootstrap attribute, which is an optional attribute that + # can be set externally as the resource is first run, to indicate a subset of nodes + # that are sufficient to do an initial bootstrap, without needing the additional + # nodes to be available yet. the flag is turned off as soon as this event proceeds. + local node=$(ocf_attribute_target $1) + + # note there is no "-l reboot". because this value would have been set before the resource + # was run, reboot indicates it's only from the current run which means we'd never get it + local_crm_attribute $node --name "${INSTANCE_ATTR_NAME}-initial-bootstrap" --quiet 2>/dev/null +} + +set_bootstrap_node() +{ + local node=$(ocf_attribute_target $1) + + local_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-bootstrap" -v "true" + +} + + +is_no_grastate() +{ + local node=$(ocf_attribute_target $1) + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" --quiet 2>/dev/null +} + +get_last_commit() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null + else + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" --quiet 2>/dev/null + fi +} + +get_safe_to_bootstrap() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null + else + remote_crm_attribute $node -l reboot --name "${INSTANCE_ATTR_NAME}-safe-to-bootstrap" --quiet 2>/dev/null + fi +} + +all_bootstrap_candidates() +{ + + local initial_bootstrap_nodes="" + local is_initial_boot + local all_nodes_pcmk="" + + all_nodes=$(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' ') + + ocf_log info "searching for initial bootstrap nodes in FQDN list: $all_nodes" + for gcomm_node in $all_nodes; do + local pcmk_node=$(galera_to_pcmk_name $gcomm_node) + if [ -z "$pcmk_node" ]; then + ocf_log err "Could not determine pacemaker node from galera name <${gcomm_node}>." + return + fi + + all_nodes_pcmk="$all_nodes_pcmk $pcmk_node" + + is_initial_boot=$(is_initial_bootstrap $pcmk_node) + ocf_log info "for node $pcmk_node, got initial boot flag value: $is_initial_boot" + + if [ x"$is_initial_boot" != x ]; then + initial_bootstrap_nodes="$initial_bootstrap_nodes $pcmk_node" + fi + + done + + if [ x"$initial_bootstrap_nodes" != x ]; then + ocf_log info "found initial bootstrap nodes, returning FQDN list $initial_bootstrap_nodes" + echo "$initial_bootstrap_nodes" + else + ocf_log info "past looking at initial bootstrap nodes" + # return normal list of all nodes + echo "$all_nodes_pcmk" + fi + +} + +master_exists() +{ + if [ "$__OCF_ACTION" = "demote" ]; then + # We don't want to detect master instances during demote. + # 1. we could be detecting ourselves as being master, which is no longer the case. + # 2. we could be detecting other master instances that are in the process of shutting down. + # by not detecting other master instances in "demote" we are deferring this check + # to the next recurring monitor operation which will be much more accurate + return 1 + fi + # determine if a master instance is already up and is healthy + crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + + local master_exists_local=$? + + if [ $master_exists_local -eq 0 ]; then + ocf_log info "Detected that a master exists for the local cluster" + fi + + # if not, and we have remote nodes, check those also + if [ $master_exists_local -ne 0 ] && [ -n "$OCF_RESKEY_remote_node_map" ]; then + for remote_ssh in $(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '{print $2;}' | sort | uniq); do + $SSH_CMD $remote_ssh crm_mon --as-xml | grep "resource.*id=\"${INSTANCE_ATTR_NAME}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + if [ $? -eq 0 ]; then + ocf_log info "Detected that a master exists for the remote cluster $remote_ssh" + return $? + fi + done + fi + + return $master_exists_local +} + +clear_master_score() +{ + local node=$(ocf_attribute_target $1) + if [ -z "$node" ]; then + $CRM_MASTER -D + else + local_crm_master $node -D + fi +} + +set_master_score() +{ + local node=$(ocf_attribute_target $1) + + if [ -z "$node" ]; then + $CRM_MASTER -v 100 + else + local_crm_master $node -v 100 + fi +} + +get_remote_node() +{ + local node=$1 + if [ -z "$OCF_RESKEY_remote_node_map" ]; then + return + else + local retval=$(echo "$OCF_RESKEY_remote_node_map" | tr ';' '\n' | tr -d ' ' | sed 's/:/ /' | awk -F' ' '$1=="'"$node"'" {print $2;exit}') + if [ -z "$retval" ]; then + return + else + echo $retval + fi + fi +} + +local_crm_master() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + $CRM_MASTER -N $node $@ + fi + + # if this is a remote node, don't set master; this will be set up + # from that pacemaker cluster. +} + +local_crm_attribute() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $node $@ + fi + + # if this is a remote node, don't run any command + +} + +remote_crm_attribute() +{ + local node=$1 + shift + + local remote_ssh=$(get_remote_node $node) + + if [ -z "$remote_ssh" ]; then + ${HA_SBIN_DIR}/crm_attribute -N $node $@ + else + $SSH_CMD $remote_ssh ${HA_SBIN_DIR}/crm_attribute -N $node $@ + fi +} + + +promote_everyone() +{ + # turn into a no-op + echo; +} + +cmd_main $@ + + +# vi:sw=4:ts=4:et: