#! /bin/bash # # Das ist der Resource Agent # /usr/lib/ocf/resource.d/lizardfs/mfsmaster # # # Manages LizardFS metadata server in full and shadow master modes # # Copyright (C) 2014 EditShare LLC # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software Foundation, # Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # ####################################################################### # # Manages the personality of LizardFS metadata server nodes as an OCF resource. # Starts nodes in shadow master state, with an invalid master. When it receives # notification of which node will be promoted to master, it switches its # master to that node. When promoted to master, it changes personality to # full master, and when demoted it stops the daemon and starts it back up # again in shadow master mode. # ####################################################################### # # TODO: # - check LizardFS metadata server to ensure it isn't configured to start at boot # - check permissions and configuration file sanity # - use lizardfs-admin information to set priorities for shadow masters # to determine which one is the best candidate to promote to master # - Add support for running only in master mode (if, for instance, we're # a master writing to an underlying replicated filesystem, and want to # use Pacemaker to manage which node we're on), instead of requiring # master/slave # ####################################################################### : ${OCF_ROOT:=/usr/lib/ocf} : ${OCF_FUNCTIONS_DIR:=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs # Usage: read_cfg_var read_cfg_var() { local cfg_file=${1} local var=${2} local sep=${3:-=} local default_value=${4} { echo "${default_value}" sed -e 's/[[:blank:]]*#.*$//' -n \ -e 's/^[[:blank:]]*'"${var}"'[[:blank:]]*'"${sep}"'[[:blank:]]*\(.*\)$/\1/p' "$cfg_file" } | tail -n 1 } # Parameters for this resource agent, with default values OCF_RESKEY_master_cfg_default=/etc/mfs/mfsmaster.cfg : ${OCF_RESKEY_master_cfg:=$OCF_RESKEY_master_cfg_default} # Convenience variables lock_timeout=10 # seconds score_master=1000 score_shadow_lastest=900 score_shadow_connected=500 score_shadow_no_metadata=-1 metadata_version_attribute_name="lizardfs-metadata-version" # Core LizardFS variables failover_ip=$(read_cfg_var ${OCF_RESKEY_master_cfg} MASTER_HOST) admin_password=$(read_cfg_var ${OCF_RESKEY_master_cfg} ADMIN_PASSWORD) data_dir=$(read_cfg_var ${OCF_RESKEY_master_cfg} DATA_PATH = @DATA_PATH@) matocl_host=$(read_cfg_var ${OCF_RESKEY_master_cfg} MATOCL_LISTEN_HOST = '*') matocl_port=$(read_cfg_var ${OCF_RESKEY_master_cfg} MATOCL_LISTEN_PORT = 9421) lizardfs_user=$(read_cfg_var ${OCF_RESKEY_master_cfg} WORKING_USER = @DEFAULT_USER@) lizardfs_group=$(read_cfg_var ${OCF_RESKEY_master_cfg} WORKING_GROUP = @DEFAULT_GROUP@) exports_cfg=$(read_cfg_var ${OCF_RESKEY_master_cfg} EXPORTS_FILENAME = /etc/mfs/mfsexports.cfg) master_metadata=${data_dir}/metadata.mfs master_lock=${data_dir}/metadata.mfs.lock master_backup_logs=${data_dir}/changelog.mfs.* # About usage() { cat< 0.1 Manages the personality of LizardFS metadata server nodes as an OCF resource. Starts nodes in shadow master state, with an invalid master. When it receives notification of which node will be promoted to master, it switches its master to that node. When promoted to master, it changes personality to full master, and when demoted it stops the daemon and starts it back up again in shadow master mode. Manages the shadow master state of LizardFS metadata server resources Config file for LizardFS metadata server; will find in config_dir if not specified. Config file for LizardFS metadata server EOF } # Utilities lizardfs_master() { local command=$1 local personality=$2 mfsmaster -c "$OCF_RESKEY_master_cfg" -o ha-cluster-managed -o initial-personality="${personality}" "${command}" } # Actions lizardfs_master_start_shadow() { lizardfs_master_monitor case $? in $OCF_RUNNING_MASTER) ocf_log warn "LizardFS metadata server already running in master mode" return $OCF_RUNNING_MASTER ;; $OCF_SUCCESS) ocf_log info "LizardFS metadata server already running as a shadow master" return $OCF_SUCCESS ;; *) ocf_log debug "starting LizardFS metadata server as shadow master" esac if ! ensure_dirs ; then return ${?} fi # When the start action is called, we are supposed to start in the # slave state, which means starting a shadow master. ocf_run lizardfs_master start shadow } metadataserver_really_stop() { if ocf_run lizardfs_master stop ; then return $OCF_SUCCESS else ocf_log warn "failed to stop LizardFS metadata server, killing instead" if ocf_run lizardfs_master kill ; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi fi } lizardfs_master_stop() { # Stop the master, if it's running lizardfs_master_monitor case $? in $OCF_RUNNING_MASTER|$OCF_SUCCESS) ocf_log debug "trying to gracefully shutdown LizardFS metadata server" metadataserver_really_stop ;; $OCF_NOT_RUNNING) ocf_log info "tried to stop already stopped instance" return $OCF_SUCCESS ;; $OCF_FAILED_MASTER) ocf_log info "tried to stop failed master" return $OCF_SUCCESS ;; *) ocf_log error "unknown state ${?}, trying to stop" metadataserver_really_stop ;; esac } lizardfs_master_promote() { lizardfs_master_monitor case $? in $OCF_RUNNING_MASTER) ocf_log info "LizardFS metadata server already running as master" return $OCF_SUCCESS ;; $OCF_SUCCESS) ocf_log debug "LizardFS metadata server is shadow master, promoting to master" local cluster_metadata_version=$(get_metadata_version) if [[ ( $? != 0 ) || ( ${cluster_metadata_version} == "" ) ]] ; then ocf_log error "Failed to obtain metadata version from cluster." return $OCF_ERR_GENERIC fi if [[ "${cluster_metadata_version}" == 0 ]] ; then ocf_log debug "running in shadow mode on fresh cluster, creating new metadata" if ! ( ocf_run lizardfs_master stop master && unlink "${master_lock}" && ocf_run lizardfs_master start master ) ; then ocf_log error "Failed to initialize new file system in cluster" return $OCF_FAILED_MASTER fi elif ! ocf_run lizardfs_admin_promote ; then ocf_log error "failed to reload master" return $OCF_FAILED_MASTER fi # Check that we are now succesfully a master lizardfs_master_monitor ret=$? case $ret in $OCF_RUNNING_MASTER) ocf_log info "LizardFS metadata server promoted successfully" return $OCF_SUCCESS ;; *) ocf_log err "LizardFS metadata server failed to promote" return $OCF_FAILED_MASTER ;; esac ;; *) ocf_log error \ "LizardFS metadata server not running as shadow master, can't be promoted" return $OCF_ERR_GENERIC ;; esac } lizardfs_master_notify() { local type_op type_op="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" ocf_log debug "Received $type_op notification" lizardfs_master_monitor if [ $? -eq $OCF_SUCCESS ] ; then # We're a shadow master node ocf_log debug "We're a shadow master node" case $type_op in pre-promote) # Start replicating from the new master local new_master=$OCF_RESKEY_CRM_meta_notify_promote_uname ocf_log debug "Changing master to $new_master" # TODO # call: # lizardfs-admin force-reconnet ${host} ${port} ;; *) ocf_log debug "Notify type_op: $type_op" ;; esac else ocf_log debug "lizardfs_master_monitor returned $?" fi return $OCF_SUCCESS } lizardfs_master_demote() { lizardfs_master_monitor case $? in $OCF_RUNNING_MASTER) ocf_log debug "LizardFS metadata server running as master, demoting" ;; $OCF_SUCCESS) ocf_log info "LizardFS metadata server already a shadow master" return $OCF_SUCCESS ;; *) ocf_log error \ "LizardFS metadata server not running, not a valid target for demotion" return $OCF_ERR_GENERIC ;; esac if ! ocf_run lizardfs_master restart shadow ; then ocf_log error "Failed to start shadow master, demotion failed" return $OCF_ERR_GENERIC fi return $OCF_SUCCESS } lizardfs_admin_promote() { local host if [ "$matocl_host" = "*" ] ; then host=localhost else host=$matocl_host fi echo -n "${admin_password}" | lizardfs-admin promote-shadow "${host}" ${matocl_port} } lizardfs_probe() { # Probe this server and return the metadataserver-status information local host if [ "$matocl_host" = "*" ] ; then host=localhost else host=$matocl_host fi lizardfs-admin metadataserver-status --porcelain "${host}" "$matocl_port" } update_master_score() { ocf_run ${HA_SBIN_DIR}/crm_master -l reboot -v $1 } set_metadata_version() { ocf_log debug "LizardFS: setting cluster metadata version: ${1}" ocf_run ${HA_SBIN_DIR}/crm_attribute --lifetime=forever --type=crm_config --name=${metadata_version_attribute_name} --update="${1}" } get_metadata_version() { ${HA_SBIN_DIR}/crm_attribute --type=crm_config --name ${metadata_version_attribute_name} --default=0 --query --quiet } lizardfs_master_monitor() { # Check if the mfsmaster process is running on this machine, and if so # check if it is as a master or a shadow master. ocf_run -info lizardfs_master isalive ret=$? if [ $ret -eq 0 ] ; then # mfsmaster is running, check to see if we're a shadow master or full # master probe_result=$(lizardfs_probe) if [ $? -ne 0 ] ; then ocf_log err "failed to query LizardFS master status" return $OCF_ERR_GENERIC fi local personality=$(echo "$probe_result" | cut -f1) local connection=$(echo "$probe_result" | cut -f2) local metadata_version=$(echo "$probe_result" | cut -f3) case "$personality/$connection" in master/running) ocf_log debug "running in master mode" update_master_score ${score_master} set_metadata_version "${metadata_version}" return $OCF_RUNNING_MASTER ;; shadow/connected|shadow/disconnected) local cluster_metadata_version=$(get_metadata_version) if [[ ( $? != 0 ) || ( ${cluster_metadata_version} == "" ) ]] ; then ocf_log error "Failed to obtain metadata version from cluster." return $OCF_ERR_GENERIC fi if [ ${metadata_version} -ge ${cluster_metadata_version} ] ; then ocf_log debug "running in shadow mode, have latest metadata: ${cluster_metadata_version}" update_master_score ${score_shadow_lastest} elif [ $metadata_version -gt 0 ] ; then ocf_log debug "running in shadow mode, latest metadata is not available: ${metadata_version} < ${cluster_metadata_version}" update_master_score ${score_shadow_connected} else ocf_log debug "running in shadow mode, no metadata" # Do not promote shadow with no metadata! update_master_score ${score_shadow_no_metadata} fi return $OCF_SUCCESS ;; *) ocf_log err \ "unexpected output from lizardfs-admin: $probe_result" return $OCF_ERR_GENERIC ;; esac elif [ $ret -eq 1 ] ; then if [ ! -e "$master_lock" ] ; then ocf_log debug "LizardFS metadata server not running (clean)." return $OCF_NOT_RUNNING else ocf_log warn "LizardFS metadata server has failed" return $OCF_FAILED_MASTER fi else ocf_log err "error checking if master is running" return $OCF_ERR_GENERIC fi } lizardfs_master_reload() { # TODO - may need to check which parameters may be reloaded # vs. requiring a restart. lizardfs_master_monitor case $? in $OCF_RUNNING_MASTER|$OCF_SUCCESS) ocf_log debug "reloading LizardFS metadata server configuration" if ocf_run lizardfs_master reload ; then return $OCF_SUCCESS else return $OCF_ERR_GENERIC fi ;; *) ocf_log error "no process running to reload" return $OCF_ERR_GENERIC ;; esac } ensure_dirs() { # ensure that the metadata dir exists if ! mkdir -p "$data_dir" ; then return $OCF_ERR_PERM fi if ! chmod 0755 "$data_dir" ; then return $OCF_ERR_PERM fi if ! chown -R $lizardfs_user:$lizardfs_group "$data_dir" ; then return $OCF_ERR_PERM fi if [ ! -e "$data_dir"/metadata.mfs ] ; then if [ ! -e "$data_dir"/metadata.mfs.back ] ; then if ! echo "MFSM NEW" > "$data_dir"/metadata.mfs ; then return $OCF_ERR_PERM fi fi fi } lizardfs_master_validate() { # We need to at least have the master and metalogger binaries installed # for this to be able to function as an LizardFS metadata server/mfs-metalogger # master/slave node. check_binary mfsmaster check_binary lizardfs-admin if [[ "${failover_ip}" == "" ]] ; then ocf_log err "MASTER_HOST not set in $OCF_RESKEY_master_cfg" exit $OCF_ERR_CONFIGURED fi if [ "x${admin_password}" = "x" ] ; then ocf_log err "ADMIN_PASSWORD not set in $OCF_RESKEY_master_cfg" exit $OCF_ERR_CONFIGURED fi if ! [ -e "$OCF_RESKEY_master_cfg" ] ; then ocf_log err "mfsmaster.cfg not found at $OCF_RESKEY_master_cfg" exit $OCF_ERR_CONFIGURED fi if ! [ -e "$exports_cfg" ] ; then ocf_log err "mfsexports.cfg not found at $exports_cfg" exit $OCF_ERR_CONFIGURED fi # Ensure that LizardFS metadata server and mfs-metalogger are not set to load at # boot; if we're managing them via the resource agent, they should # not be loaded by init } if [ $# -ne 1 ] ; then usage exit $OCF_ERR_ARGS fi case "$1" in meta-data) lizardfs_ra_metadata exit $OCF_SUCCESS ;; usage|help) usage exit $OCF_SUCCESS ;; esac # All actions besides metadata and usage must pass validation lizardfs_master_validate case "$1" in start) lizardfs_master_start_shadow;; stop) lizardfs_master_stop;; monitor) lizardfs_master_monitor;; reload) lizardfs_master_reload;; promote) lizardfs_master_promote;; demote) lizardfs_master_demote;; # notify) lizardfs_master_notify;; # We have already validated by now validate-all) ;; *) usage; exit $OCF_ERR_UNIMPLEMENTED;; esac rc=$? ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} : $rc" exit $rc