#!/bin/sh
#
# 
# oracle
#
# Description:	Manages an Oracle Database as a High-Availability
#		resource
#
#
# Author:	Dejan Muhamedagic
# Support:	users@clusterlabs.org
# License:	GNU General Public License (GPL)
# Copyright:	(C) 2006 International Business Machines, Inc.
#
#		This code inspired by the DB2 resource script
#		written by Alan Robertson
#
# An example usage in /etc/ha.d/haresources: 
#       node1  10.0.0.170 oracle::RK1::/oracle/10.2::orark1
#
# See oracle_usage() function below for more details...
#
# OCF instance parameters:
#	OCF_RESKEY_sid
#	OCF_RESKEY_home (optional; else read it from /etc/oratab)
#	OCF_RESKEY_user (optional; figure it out by checking file ownership)
#	OCF_RESKEY_ipcrm (optional; defaults to "instance")
#	OCF_RESKEY_clear_backupmode (optional; default to "false")
#	OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort")
#	OCF_RESKEY_monuser (optional; defaults to "OCFMON")
#	OCF_RESKEY_monpassword (optional; defaults to "OCFMON")
#	OCF_RESKEY_monprofile (optional; defaults to "OCFMONPROFILE")
#
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
. ${OCF_FUNCTIONS_DIR}/ora-common.sh

#######################################################################

oracle_usage() {
  methods=`oracle_methods`
  methods=`echo $methods | tr ' ' '|'`
  cat <<-!
	usage: $0 {$methods}

	$0 manages an Oracle Database instance as an HA resource.

	The 'start' operation starts the database.
	The 'stop' operation stops the database.
	The 'status' operation reports whether the database is running
	The 'monitor' operation reports whether the database seems to be working
	The 'dumpinstipc' operation prints IPC resources used by the instance
	The 'cleanup' operation tries to clean up after Oracle was brutally stopped
	The 'validate-all' operation reports whether the parameters are valid
	The 'methods' operation reports on the methods $0 supports

	!
}

# Defaults
OCF_RESKEY_sid_default=""
OCF_RESKEY_home_default=""
OCF_RESKEY_user_default=""
OCF_RESKEY_monuser_default="OCFMON"
OCF_RESKEY_monpassword_default="OCFMON"
OCF_RESKEY_monprofile_default="OCFMONPROFILE"
OCF_RESKEY_ipcrm_default="instance"
OCF_RESKEY_clear_backupmode_default="false"
OCF_RESKEY_shutdown_method_default="checkpoint/abort"

oracle_meta_data() {
	cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="oracle" version="1.0">
<version>1.0</version>

<longdesc lang="en">
Resource script for oracle. Manages an Oracle Database instance
as an HA resource.
</longdesc>
<shortdesc lang="en">Manages an Oracle Database instance</shortdesc>

<parameters>

<parameter name="sid" unique="1" required="1">
<longdesc lang="en">
The Oracle SID (aka ORACLE_SID).
</longdesc>
<shortdesc lang="en">sid</shortdesc>
<content type="string" default="${OCF_RESKEY_sid_default}" />
</parameter>

<parameter name="home" unique="0">
<longdesc lang="en">
The Oracle home directory (aka ORACLE_HOME).
If not specified, then the SID along with its home should be listed in
/etc/oratab.
</longdesc>
<shortdesc lang="en">home</shortdesc>
<content type="string" default="${OCF_RESKEY_home_default}" />
</parameter>

<parameter name="user" unique="0">
<longdesc lang="en">
The Oracle owner (aka ORACLE_OWNER).
If not specified, then it is set to the owner of
file \$ORACLE_HOME/dbs/*\${ORACLE_SID}.ora.
If this does not work for you, just set it explicitely.
</longdesc>
<shortdesc lang="en">user</shortdesc>
<content type="string" default="${OCF_RESKEY_user_default}" />
</parameter>

<parameter name="monuser" unique="0">
<longdesc lang="en">
Monitoring user name. Every connection as
sysdba is logged in an audit log. This can
result in a large number of new files created.
A new user is created (if it doesn't exist) in
the start action and subsequently used in monitor.
It should have very limited rights. Make sure
that the password for this user does not expire.
</longdesc>
<shortdesc lang="en">monuser</shortdesc>
<content type="string" default="$OCF_RESKEY_monuser_default" />
</parameter>

<parameter name="monpassword" unique="0">
<longdesc lang="en">
Password for the monitoring user. Make sure
that the password for this user does not expire.
Need to explicitly set a password to a new monitor
user for the security reason.
</longdesc>
<shortdesc lang="en">monpassword</shortdesc>
<content type="string" default="$OCF_RESKEY_monpassword_default" />
</parameter>

<parameter name="monprofile" unique="0">
<longdesc lang="en">
Profile used by the monitoring user. If the
profile does not exist, it will be created
with a non-expiring password.
</longdesc>
<shortdesc lang="en">monprofile</shortdesc>
<content type="string" default="$OCF_RESKEY_monprofile_default" />
</parameter>

<parameter name="ipcrm" unique="0">
<longdesc lang="en">
Sometimes IPC objects (shared memory segments and semaphores)
belonging to an Oracle instance might be left behind which
prevents the instance from starting. It is not easy to figure out
which shared segments belong to which instance, in particular when
more instances are running as same user.

What we use here is the "oradebug" feature and its "ipc" trace
utility. It is not optimal to parse the debugging information, but
I am not aware of any other way to find out about the IPC
information. In case the format or wording of the trace report
changes, parsing might fail. There are some precautions, however,
to prevent stepping on other peoples toes. There is also a
dumpinstipc option which will make us print the IPC objects which
belong to the instance. Use it to see if we parse the trace file
correctly.

Three settings are possible:

- none: don't mess with IPC and hope for the best (beware: you'll
  probably be out of luck, sooner or later)
- instance: try to figure out the IPC stuff which belongs to the
  instance and remove only those (default; should be safe)
- orauser: remove all IPC belonging to the user which runs the
  instance (don't use this if you run more than one instance as same
  user or if other apps running as this user use IPC)

The default setting "instance" should be safe to use, but in that
case we cannot guarantee that the instance will start. In case IPC
objects were already left around, because, for instance, someone
mercilessly killing Oracle processes, there is no way any more to
find out which IPC objects should be removed. In that case, human
intervention is necessary, and probably _all_ instances running as
same user will have to be stopped. The third setting, "orauser",
guarantees IPC objects removal, but it does that based only on IPC
objects ownership, so you should use that only if every instance
runs as separate user.

Please report any problems. Suggestions/fixes welcome.
</longdesc>
<shortdesc lang="en">ipcrm</shortdesc>
<content type="string" default="${OCF_RESKEY_ipcrm_default}" />
</parameter>

<parameter name="clear_backupmode" unique="0" required="0">
<longdesc lang="en">
The clear of the backup mode of ORACLE.
</longdesc>
<shortdesc lang="en">clear_backupmode</shortdesc>
<content type="boolean" default="${OCF_RESKEY_clear_backupmode_default}" />
</parameter>

<parameter name="shutdown_method" unique="0" required="0">
<longdesc lang="en">
How to stop Oracle is a matter of taste it seems. The default
method ("checkpoint/abort") is:

	alter system checkpoint;
	shutdown abort;

This should be the fastest safe way bring the instance down. If
you find "shutdown abort" distasteful, set this attribute to
"immediate" in which case we will

	shutdown immediate;

If you still think that there's even better way to shutdown an
Oracle instance we are willing to listen.
</longdesc>
<shortdesc lang="en">shutdown_method</shortdesc>
<content type="string" default="${OCF_RESKEY_shutdown_method_default}" />
</parameter>

</parameters>

<actions>
<action name="start" timeout="120s" />
<action name="stop" timeout="120s" />
<action name="status" timeout="5s" />
<action name="monitor" depth="0" timeout="30s" interval="120s" />
<action name="validate-all" timeout="5s" />
<action name="methods" timeout="5s" />
<action name="meta-data" timeout="5s" />
</actions>
</resource-agent>
END
}


#
# methods: What methods/operations do we support?
#
oracle_methods() {
  cat <<-!
	start
	stop
	status
	monitor
	dumpinstipc
	showdbstat
	cleanup
	validate-all
	methods
	meta-data
	usage
	!
}

#
#	Run commands as the Oracle owner...
#
execsql() {
	if [ "$US" = "$ORACLE_OWNER" ]; then
		sqlplus -S /nolog
	else
		su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog"
	fi
}

#
#	Run commands in the oracle admin sqlplus...
#
common_sql_opts() {
	cat<<EOF
set feedback off
set heading off
set pagesize 0
EOF
}
common_sql_filter() {
	grep -v '^Connected' |
		grep -v '^ENV MSG:' |
		grep -v 'Your password will expire in'
}
runsql() {
	local conn_s="$1"
	shift 1
	local func
	(
	echo "$conn_s"
	common_sql_opts
	for func; do $func; done
	) |
	execsql | common_sql_filter
}
dbasql() {
	runsql "connect / as sysdba" $*
}
monsql() {
	runsql "connect \"$MONUSR\"/\"$MONPWD\"" $*
}
# use dbasql_one if the query should result in a single line output
# at times people stuff commands in oracle .profile
# which may produce extra output
dbasql_one() {
	dbasql $* | tail -1
}
monsql_one() {
	monsql $* | tail -1
}

#
# various interesting sql
#
dbstat() {
	echo 'select status from v$instance;'
}
dbmount() {
	echo 'alter database mount;'
}
dbopen() {
	echo 'alter database open;'
}
dbstop_immediate() {
	echo 'shutdown immediate'
}
dbstop_checkpoint_abort() {
	echo 'alter system checkpoint;'
	echo 'shutdown abort'
}
dbstop() {
	case "${shutdown_method}" in
	"immediate")
		dbstop_immediate
	;;
	"checkpoint/abort")
		dbstop_checkpoint_abort
	;;
	esac
}
dbstart() {
	echo 'startup'
}
dbstart_mount() {
	echo 'startup mount'
}
dbendbackup() {
	echo 'alter database end backup;'
}
db_backup_mode() {
	echo "select 'COUNT'||count(*) from v\$backup where status='ACTIVE';"
}
is_clear_backupmode_set(){
	ocf_is_true "${clear_backupmode}"
}
is_instance_in_backup_mode() {
	local count
	count="`dbasql_one db_backup_mode | sed 's/COUNT//'`"
	[ x"$count" != x"0" ]
}
clear_backup_mode() {
	local output
	output="`dbasql dbendbackup`"
	ocf_log info "Oracle instance $ORACLE_SID alter database end backup: $output"
}
getdumpdest() {
	#echo 'select value from v$parameter where name = \'user_dump_dest\';'
	echo "select value from v\$parameter where name = 'user_dump_dest';"
}
getipc() {
	echo "oradebug setmypid"
	echo "oradebug tracefile_name"
	echo "oradebug ipc"
}
show_mon_profile() {
	echo "select PROFILE from dba_profiles where PROFILE='$MONPROFILE';"
}
mk_mon_profile() {
	cat<<EOF
create profile "$MONPROFILE" limit FAILED_LOGIN_ATTEMPTS UNLIMITED PASSWORD_LIFE_TIME UNLIMITED;
EOF
}
show_mon_user() {
	echo "select USERNAME, ACCOUNT_STATUS from dba_users where USERNAME='$MONUSR';"
}
mk_mon_user() {
	cat<<EOF
create user "$MONUSR" identified by "$MONPWD" profile "$MONPROFILE";
grant create session to "$MONUSR";
grant select on v_\$instance to "$MONUSR";
EOF
}
show_mon_user_profile() {
	echo "select PROFILE from dba_users where USERNAME='$MONUSR';"
}
set_mon_user_profile() {
	echo "alter user "$MONUSR" profile "$MONPROFILE";"
}
reset_mon_user_password() {
	echo "alter user "$MONUSR" identified by "$MONPWD";"
}
check_mon_profile() {
	local output
	output=`dbasql show_mon_profile`
	if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
		return 0
	fi
	output=`dbasql mk_mon_profile show_mon_profile`
	if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
		return 0
	elif echo "$output" | grep ORA-65140 >/dev/null 2>&1; then
		ocf_exit_reason "monprofile must start with C## for container databases"
		return $OCF_ERR_CONFIGURED
	else
		ocf_exit_reason "could not create $MONPROFILE oracle profile"
		ocf_log err "sqlplus output: $output"
		return 1
	fi
}
check_mon_user() {
	local output
	local output2

	output=`dbasql show_mon_user`
	if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
		if echo "$output" | grep -w "EXPIRED" >/dev/null; then
			dbasql reset_mon_user_password
		fi
		output=`dbasql show_mon_user_profile`
		if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
			return 0
		else
			output=`dbasql set_mon_user_profile`
			output2=`dbasql show_mon_user_profile`
			if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then
				return 0
			fi
			ocf_exit_reason "could not set profile for $MONUSR oracle user"
			ocf_log err "sqlplus output: $output( $output2 )"
			return 1
		fi
	fi

	if [ -z "$OCF_RESKEY_monpassword" ]; then
		ocf_exit_reason "Please explicitly set a password for $MONUSR oracle user"
		exit $OCF_ERR_CONFIGURED
	fi

	output=`dbasql mk_mon_user show_mon_user`
	if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
		return 0
	elif echo "$output" | grep ORA-65096 >/dev/null 2>&1; then
		ocf_exit_reason "monuser must start with C## for container databases"
		return $OCF_ERR_CONFIGURED
	else
		ocf_exit_reason "could not create $MONUSR oracle user"
		ocf_log err "sqlplus output: $output"
		return 1
	fi
}
#
# print the output of dbstat (for debugging)
#
showdbstat() {
	echo "Full output:"
	dbstat | execsql
	echo "Stripped output:"
	echo "<`dbasql dbstat`>"
}

#
# IPC stuff: not overly complex, but quite involved :-/
#

# Part 1: Oracle
other_trace_junk() {
	echo $1 | sed 's/trc$/trm/'
}
dumpinstipc() {
	local output tracef
	output=`dbasql getipc` # filename in the 2nd line
	tracef=`echo "$output" | awk 'NR==2' | grep '^/.*trc$'`
	if [ "$tracef" ]; then
		echo $tracef
	else
		ocf_log warn "'dbasql getipc' failed: $output"
		return 1
	fi
}
parseipc() {
	local inf=$1
	if [ ! -f "$1" ]; then
		ocf_log warn "$1: no such ipc trace file"
		return 1
	fi
	awk '
		$3 == "Shmid" {n=1;next}
		n {
			if( $3~/^[0-9]+$/ ) print $3;
			n=0
		}
	' $inf |
	sort -u | sed 's/^/m:/'
	awk '
		/Semaphore List/ {insems=1;next}
		insems {
			for( i=1; i<=NF; i++ )
				if( $i~/^[0-9]+$/ ) print $i;
		}
		/system semaphore information/ {exit}
	' $inf |
	sort -u | sed 's/^/s:/'
	TMPFILES="$TMPFILES $inf `other_trace_junk $inf`"
}

# Part 2: OS (ipcs,ipcrm)
filteroraipc() {  # this portable?
	grep -w $ORACLE_OWNER | awk '{print $2}'
}
ipcdesc() {
	local what=$1
	case $what in
	m) echo "shared memory segment";;
	s) echo "semaphore";;
	q) echo "message queue";;
	esac
}
rmipc() {
	local what=$1 id=$2
	ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 ||
		return
	ocf_log info "Removing `ipcdesc $what` $id."
	ipcrm -$what $id
}
ipcrm_orauser() {
	local what id
	for what in m s q; do
		for id in `ipcs -$what | filteroraipc`; do
			rmipc $what $id
		done
	done
}
ipcrm_instance() {
	local ipcobj
	for ipcobj; do
		rmipc `echo $ipcobj | sed 's/:/ /'`
	done
}

#
# oracle_status: is the Oracle instance running?
#
# quick check to see if the instance is up
is_proc_running() {
	ps -ef | grep -wiqs "[^ ]*[_]pmon_${ORACLE_SID}"
}
# instance in OPEN state?
instance_live() {
	local status=`monsql_one dbstat`
	[ "$status" = OPEN ] && return 0
	ocf_log warn "Unable to login as \"$MONUSR\", using \"sysdba\" user instead"
	status=`dbasql_one dbstat`
	if [ "$status" = OPEN ]; then
		return 0
	else
		ocf_log info "$ORACLE_SID instance state is not OPEN (dbstat output: $status)"
		return 1
	fi
}

ora_cleanup() {
	#rm -fr /tmp/.oracle #???
	rm -f `ls $ORACLE_HOME/dbs/lk* | grep -i "$ORACLE_SID\$"`
	#return

	case $IPCRM in
	none)
		;;
	instance)
		ipcrm_instance $*
		;;
	orauser)
		ipcrm_orauser $*
		;;
	esac
}

oracle_getconfig() {
	ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user"

	clear_backupmode=${OCF_RESKEY_clear_backupmode:-${OCF_RESKEY_clear_backupmode_default}}
	shutdown_method=${OCF_RESKEY_shutdown_method:-${OCF_RESKEY_shutdown_method_default}}
	IPCRM=${OCF_RESKEY_ipcrm:-${OCF_RESKEY_ipcrm_default}}
}

#
# oracle_start: Start the Oracle instance
#
# NOTE: We handle instance in the MOUNTED and STARTED states
# efficiently
# We *do not* handle instance in the restricted or read-only
# mode, i.e. it appears as running, but its availability is
# "not for general use"
#

oracle_start() {
	local status output
	if is_proc_running; then
		status="`dbasql_one dbstat`"
		case "$status" in
		"OPEN")
			: nothing to be done, we can leave right now
			ocf_log info "Oracle instance $ORACLE_SID already running"
			return $OCF_SUCCESS
		;;
		"STARTED")
			output=`dbasql dbmount`
		;;
		"MOUNTED")
			: we proceed if mounted
		;;
		*) # status unknown
			output=`dbasql dbstop dbstart_mount`
		;;
		esac
	else
		output="`dbasql dbstart_mount`"
		# try to cleanup in case of
		# ORA-01081: cannot start already-running ORACLE - shut it down first
		if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
			ocf_log info "ORA-01081 error found, trying to cleanup oracle (dbstart_mount output: $output)"
			ora_cleanup
			output=`dbasql dbstop_immediate`
			output=`dbasql dbstart_mount`
		fi
	fi

	# oracle instance should be mounted.
	status="`dbasql_one dbstat`"
	case "$status" in
	"MOUNTED")
		;;
	*)
		: error!!
		ocf_exit_reason "oracle $ORACLE_SID can not be mounted (status: $status)"
		return $OCF_ERR_GENERIC
		;;
	esac

	# It is examined whether mode is "online backup mode",
	# and if it is true, makes clear the mode.
	# Afterwards, DB is opened.
	if is_clear_backupmode_set && is_instance_in_backup_mode; then
		clear_backup_mode
	fi
	output=`dbasql dbopen`

	# check/create the monitor profile
	if ! check_mon_profile; then
		# dbopen was failed if there is any $output
		[ -n "$output" ] && ocf_exit_reason "oracle $ORACLE_SID can not be opened: $output"
		return $OCF_ERR_GENERIC
	fi

	# check/create the monitor user
	if ! check_mon_user; then
		# dbopen was failed if there is any $output
		[ -n "$output" ] && ocf_exit_reason "oracle $ORACLE_SID can not be opened: $output"
		return $OCF_ERR_GENERIC
	fi

	if ! is_proc_running; then
		ocf_exit_reason "oracle process not running: $output"
		return $OCF_ERR_GENERIC
	elif ! instance_live; then
		ocf_exit_reason "oracle instance $ORACLE_SID not started: $output"
		return $OCF_ERR_GENERIC
	else
		: cool, we are up and running
		ocf_log info "Oracle instance $ORACLE_SID started: $output"
		return $OCF_SUCCESS
	fi
}

#
# oracle_stop: Stop the Oracle instance
#
oracle_stop() {
	local status output ipc=""
	if is_proc_running; then
		[ "$IPCRM" = "instance" ] && ipc=$(parseipc `dumpinstipc`)
		output=`dbasql dbstop`
	else
		ocf_log info "Oracle instance $ORACLE_SID already stopped"
		return $OCF_SUCCESS
	fi
	ocf_stop_processes TERM $PROCS_CLEANUP_TIME `proc_pids`  # kill the procs if they hanged
	if is_proc_running; then
		ocf_exit_reason "Oracle instance $ORACLE_SID not stopped: $output"
		return $OCF_ERR_GENERIC
	else
		ocf_log info "Oracle instance $ORACLE_SID stopped: $output"
		sleep 1  # give em a chance to cleanup
		ocf_log info "Cleaning up for $ORACLE_SID"
		ora_cleanup "$ipc"
		return $OCF_SUCCESS
	fi
}

#
# oracle_monitor: Can the Oracle instance do anything useful?
#
oracle_monitor() {
	if ! is_proc_running; then
		ocf_log info "oracle process not running"
		return $OCF_NOT_RUNNING
	fi
	if ! instance_live; then
		ocf_exit_reason "oracle instance $ORACLE_SID is down"
		return $OCF_ERR_GENERIC
	fi
	#ocf_log info "Oracle instance $ORACLE_SID is alive"
	return $OCF_SUCCESS
}

# other supported actions
oracle_status() {
	if is_proc_running
	then
	  echo Oracle instance $ORACLE_SID is running
	  exit $OCF_SUCCESS
	else
	  echo Oracle instance $ORACLE_SID is stopped
	  exit $OCF_NOT_RUNNING
	fi
}
oracle_dumpinstipc() {
	is_proc_running && parseipc `dumpinstipc`
}
oracle_showdbstat() {
	showdbstat
}
oracle_cleanup() {
	if [ "$IPCRM" = "instance" ]; then
		ora_cleanup $(parseipc `dumpinstipc`)
	else
		ora_cleanup
	fi
}
oracle_validate_all() {
	case "${shutdown_method}" in
	"immediate") ;;
	"checkpoint/abort") ;;
	*) ocf_exit_reason "unsupported shutdown_method, please read meta-data"
		return $OCF_ERR_CONFIGURED
		;;
	esac

	case "${IPCRM}" in
	"none"|"instance"|"orauser") ;;
	*) ocf_exit_reason "unsupported ipcrm setting, please read meta-data"
		return $OCF_ERR_CONFIGURED
		;;
	esac

	ora_common_validate_all
}

# used in ora-common.sh
show_procs() {
	ps -e -o pid,args | grep -i "[o]ra[a-zA-Z0-9_]*$ORACLE_SID$"
}
proc_pids() { show_procs | awk '{print $1}'; }
PROCS_CLEANUP_TIME="30"

MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default}
MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default}
MONPROFILE=${OCF_RESKEY_monprofile:-$OCF_RESKEY_monprofile_default}

MONUSR=$(echo "$MONUSR" | awk '{print toupper($0)}')
MONPROFILE=$(echo "$MONPROFILE" | awk '{print toupper($0)}')
OCF_REQUIRED_PARAMS="sid"
OCF_REQUIRED_BINARIES="sqlplus"
ocf_rarun $*

#
# vim:tabstop=4:shiftwidth=4:textwidth=0:wrapmargin=0
