##
# VampirTrace
# http://www.tu-dresden.de/zih/vampirtrace
#
# Copyright (c) 2005-2013, ZIH, TU Dresden, Federal Republic of Germany
#
# Copyright (c) 1998-2005, Forschungszentrum Juelich, Juelich Supercomputing
#                          Centre, Federal Republic of Germany
#
# See the file COPYING in the package base directory for details
##

# make sure that this script is called from vtiofsl-start or vtiofsl-stop
#
if test x"$_VTIOFSL_EXENAME" = x; then
  echo "$0: Error: This script must be run inside vtiofsl-start or" \
"vtiofsl-stop"
  exit 1
fi

_VTIOFSL_CORES_PER_NODE=16
_VTIOFSL_SERVER_APP="iofwd"
_VTIOFSL_SERVER_INIT_TIME=15

_VTIOFSL_BMI_TRANSPORT=tcp
_VTIOFSL_BMI_TCP_PORT=12345

# Computed variables
_VTIOFSL_TMP_PATH="${VT_PFORM_GDIR:-$PWD}/.iofsl"
mkdir -p $_VTIOFSL_TMP_PATH
if [[ ! -d "$_VTIOFSL_TMP_PATH" || ! -w "$_VTIOFSL_TMP_PATH" ]]; then
	_vtiofsl_error_msg "Cannot write to temporary path \"$_VTIOFSL_TMP_PATH\""
	exit 1 
fi
_VTIOFSL_TMP_PREFIX="${_VTIOFSL_TMP_PATH}/iofsl.$BATCH_JOBID"
_VTIOFSL_NODE_FILE="${_VTIOFSL_TMP_PREFIX}.nodes"
_VTIOFSL_LOG_FILE="${_VTIOFSL_TMP_PREFIX}.log"
_VTIOFSL_CSH_FILE="${_VTIOFSL_TMP_PREFIX}.csh"; touch ${_VTIOFSL_CSH_FILE}
_VTIOFSL_APRUN_CHUNK_SIZE=100

# Parameters:
#   $1 file path to config source
#   $2 file path to created config file
#   $3 server bmi address
#   $4 rpc ip list, may be empty if not used
_vtiofsl_server_config_update() {
	# Escape the sed replacement string, especially '/'
	local ESC_SA=`echo "$3" | sed -e 's/\\(\\/\\|\\\\\\|&\\)/\\\\&/g'`
	local ESC_RPC=`echo "$4" | sed -e 's/\\(\\/\\|\\\\\\|&\\)/\\\\&/g'`
	cat "$1" | sed -e "s/SERVER_ADDR/$ESC_SA/g" | sed -e "s/RPC_IP_LIST/$ESC_RPC/g" > $2
}

#
# subroutine _vtiofsl_start_servers() - start the IOFSL servers
#
_vtiofsl_start_servers()
{
	# required for more than 1024 file handles
	export APRUN_XFER_LIMITS=1
	echo "setenv APRUN_XFER_LIMITS '1'" >> ${_VTIOFSL_CSH_FILE}

	local _VTIOFSL_SUSPEND_JOBCONTROL=false
	case "$-" in
	*m*)	_VTIOFSL_SUSPEND_JOBCONTROL=true
		set +m
		_vtiofsl_verbose_msg 2 "Suspended job control."
		;;
	*)	;;
	esac

	# get the node names
	if [ ! -e "$_VTIOFSL_NODE_FILE" ]; then
		# eventually this should be solved as a module dependency
		if ! hash rca-helper 2>&-; then
			_vtiofsl_error_msg "Error: rca-helper required, try \"module load rca\" (should be done by the vampirtrace module). Aborting."
			return 1
		fi
		aprun -n $_VTIOFSL_NUM_SERVERS -N 1 rca-helper -i > $_VTIOFSL_NODE_FILE
		if [ "$?" -ne "0" ]; then
			_vtiofsl_error_msg "Error: There was an error launching aprun rca-helper to get the node file. Aborting."
			return 1
		fi
	fi
	
	if [[ "$_VTIOFSL_MODE" == "MULTIFILE_SPLIT" ]]; then
		export ZOIDFS_ROUTING_MODE="ZOIDFS_STATIC_SERVER"
		echo "setenv ZOIDFS_ROUTING_MODE 'ZOIDFS_STATIC_SERVER'" >> ${_VTIOFSL_CSH_FILE}
		# Each server knows only himself
		export ZOIDFS_NUM_IOSERVERS=1
		echo "setenv ZOIDFS_NUM_IOSERVERS '1'" >> ${_VTIOFSL_CSH_FILE}
		export ZOIDFS_SERVER_RANK=0
		echo "setenv ZOIDFS_SERVER_RANK '0'" >> ${_VTIOFSL_CSH_FILE}
		_VTIOFSL_SERVER_CONFIG_SOURCE="$prefix/etc/vtiofsl-platform-crayxk6-iofwd.cf"
		_VTIOFSL_RPC_IP_LIST=""
	else
		# there seems to be a problem either with iofwd or the config file
		# disabled for further investigation
		_vtiofsl_error_msg "Error: MULTIFILE mode currently not supported on crayxk6 playform. Aborting."
		return 1
		export ZOIDFS_NUM_IOSERVERS=$_VTIOFSL_NUM_SERVERS
		echo "setenv ZOIDFS_NUM_IOSERVERS '$_VTIOFSL_NUM_SERVERS'" >> ${_VTIOFSL_CSH_FILE}
		# Use Multiserver atomic append config file
		_VTIOFSL_SERVER_CONFIG_SOURCE="$prefix/etc/vtiofsl-platform-crayxk6-iofwd-msaa.cf"
	    
		# Prepare the RPC list (ip based) for MSAA
		for _VTIOFSL_SERVER_NODE_NAME in $_VTIOFSL_SERVER_NODE_NAMES; do
			_VTIOFSL_SERVER_NID=`printf "nid%05i" $_VTIOFSL_SERVER_NODE_NAME`
	
			_VTIOFSL_IP=`getent hosts $_VTIOFSL_SERVER_NID | cut -d" " -f 1`
			# Not sure if that is the same port as BMI?
			local _VTIOFSL_RPC_IP="${_VTIOFSL_IP}:${_VTIOFSL_BMI_TCP_PORT}"
			
			if [ -n "$_VTIOFSL_RPC_IP_LIST" ]; then
				_VTIOFSL_RPC_IP_LIST="${_VTIOFSL_RPC_IP_LIST}, "
			fi
			_VTIOFSL_RPC_IP_LIST="${_VTIOFSL_RPC_IP_LIST}\"${_VTIOFSL_RPC_IP}\""
		done
	fi
	_VTIOFSL_SERVER_NODE_NAMES=`grep '^[0-9]\+$' $_VTIOFSL_NODE_FILE`
	local _VTIOFSL_SERVER_COUNTER=0
	local _VTIOFSL_APRUN_CHUNK_COUNTER=0
	_VTIOFSL_APRUN_PIDS=""
	local _VTIOFSL_APRUN_CMD=""
	_VTIOFSL_SERVERS=""
	local _VTIOFSL_COUNT=( $_VTIOFSL_SERVER_NODE_NAMES )
	if [ "$_VTIOFSL_NUM_SERVERS" -ne "${#_VTIOFSL_COUNT[@]}" ]; then
		_vtiofsl_error_msg "Error: Invalid number of servers found ${#_VTIOFSL_COUNT[@]}, expected $_VTIOFSL_NUM_SERVERS. See server node file $_VTIOFSL_NODE_FILE"
	fi
	_vtiofsl_verbose_msg 1 "Starting $_VTIOFSL_NUM_SERVERS iofwd servers"
	for _VTIOFSL_SERVER_NODE_NAME in $_VTIOFSL_SERVER_NODE_NAMES; do
		local _VTIOFSL_SERVER_ID_NUMBER=`printf "%i" $_VTIOFSL_SERVER_NODE_NAME`
		local _VTIOFSL_SERVER_ID_STRING=`printf "nid%05i" $_VTIOFSL_SERVER_NODE_NAME`
	
		case "$_VTIOFSL_BMI_TRANSPORT" in
			"tcp" )
			local _VTIOFSL_SERVER_IP_ADDR=`getent hosts $_VTIOFSL_SERVER_ID_STRING | cut -d" " -f 1`
			local _VTIOFSL_SERVER_BMI="tcp://$_VTIOFSL_SERVER_IP_ADDR:$_VTIOFSL_BMI_TCP_PORT"
			;;
			"portals" )
			local _VTIOFSL_SERVER_BMI="$_VTIOFSL_SERVER_ID_STRING:312"
		esac
	
		_vtiofsl_verbose_msg 2 "[#$_VTIOFSL_SERVER_COUNTER] $_VTIOFSL_SERVER_ID_NUMBER / $_VTIOFSL_SERVER_ID_STRING: bmi addr: $_VTIOFSL_SERVER_BMI"
	
		# Setup the config file for each server
		local _VTIOFSL_CONFIG_FILE="${_VTIOFSL_TMP_PREFIX}.config.cf.pbs.${_VTIOFSL_SERVER_COUNTER}"
		_vtiofsl_server_config_update "$_VTIOFSL_SERVER_CONFIG_SOURCE" "$_VTIOFSL_CONFIG_FILE" "$_VTIOFSL_SERVER_BMI" "$_VTIOFSL_RPC_IP_LIST"
	
		if [ -n "$_VTIOFSL_SERVERS" ]; then
			_VTIOFSL_SERVERS="${_VTIOFSL_SERVERS},"
		fi	
		_VTIOFSL_SERVERS="${_VTIOFSL_SERVERS}${_VTIOFSL_SERVER_BMI}"
	
		local _VTIOFSL_APRUN_ENTRY="-n 1 -N 1 -d $_VTIOFSL_CORES_PER_NODE -L $_VTIOFSL_SERVER_ID_NUMBER $_VTIOFSL_SERVER_APP --config $_VTIOFSL_CONFIG_FILE"
	
		# Prepare chunked parts of the APRUN Line
		if [ -n "$_VTIOFSL_APRUN_CMD" ]; then
			_VTIOFSL_APRUN_CMD="$_VTIOFSL_APRUN_CMD :"
		fi
		if [[ "$_VTIOFSL_MODE" != "MULTIFILE_SPLIT" ]]
		then
			# FIXME THIS DOESN'T WORK WITH CHUNKING ?!
			export ZOIDFS_SERVER_RANK=$_VTIOFSL_SERVER_COUNTER
			echo "setenv ZOIDFS_SERVER_RANK '$_VTIOFSL_SERVER_COUNTER'" >> ${_VTIOFSL_CSH_FILE}
		fi

		_VTIOFSL_APRUN_CMD="$_VTIOFSL_APRUN_CMD $_VTIOFSL_APRUN_ENTRY"

		let _VTIOFSL_SERVER_COUNTER++
		if (( $_VTIOFSL_SERVER_COUNTER % $_VTIOFSL_APRUN_CHUNK_SIZE == 0 )); then
			# start the server in the background and save the PID
	        	aprun $_VTIOFSL_APRUN_CMD &> "${_VTIOFSL_LOG_FILE}.${_VTIOFSL_APRUN_CHUNK_COUNTER}" &
	        	_VTIOFSL_APRUN_PIDS="$_VTIOFSL_APRUN_PIDS $!"
	        	let _VTIOFSL_APRUN_CHUNK_COUNTER++
	        	_VTIOFSL_APRUN_CMD=""
		fi
	done
	
	# Start the last chunk 
	if [ -n "_VTIOFSL_APRUN_CMD" ]; then
        	aprun $_VTIOFSL_APRUN_CMD &> "${_VTIOFSL_LOG_FILE}.${_VTIOFSL_APRUN_CHUNK_COUNTER}" &
        	_VTIOFSL_APRUN_PIDS="$_VTIOFSL_APRUN_PIDS $!"
		if [ "$?" -ne "0" ]; then
			_vtiofsl_error_msg "Error: Could not launch server with aprun: $_VTIOFSL_APRUN_CMD (see ${_VTIOFSL_LOG_FILE}.${_VTIOFSL_APRUN_CHUNK_COUNTER}). Aborting."
			return 1
		fi    
	fi
	
	_vtiofsl_verbose_msg 2 "Sleeping for $_VTIOFSL_SERVER_INIT_TIME to wait for servers to initialize"
	sleep $_VTIOFSL_SERVER_INIT_TIME
	
	# Check if servers are all running
	_VTIOFSL_APRUN_CHUNK_COUNTER=0
	for _VTIOFSL_APRUN_PID in $_VTIOFSL_APRUN_PIDS; do
	    if ! kill -s 0 $_VTIOFSL_APRUN_PID; then
	        _vtiofsl_error_msg "Error: Aprun proccess died unexpectedly (see ${_VTIOFSL_LOG_FILE}.${_VTIOFSL_APRUN_CHUNK_COUNTER}). Aborting."
	        return 1
	    fi
	    let _VTIOFSL_APRUN_CHUNK_COUNTER++
	done
	
	# For later use when shutting down.
	export _VTIOFSL_APRUN_PIDS
	echo "setenv _VTIOFSL_APRUN_PIDS '$_VTIOFSL_APRUN_PIDS'" >> ${_VTIOFSL_CSH_FILE}

	# Resume interactive mode if neccessary
	if $_VTIOFSL_SUSPEND_JOBCONTROL; then
		set -m
	fi
}

#
# subroutine _vtiofsl_stop_servers() - stop running IOFSL servers
#
_vtiofsl_stop_servers()
{
	_vtiofsl_verbose_msg 1 "Killing the iofsl servers... (SIGINT)"
	
	for _VTIOFSL_APRUN_PID in $_VTIOFSL_APRUN_PIDS
	do
#		if pkill -s 2 -P $_VTIOFSL_APRUN_PID; then
		_VTIOFSL_CHILD_PID=`pgrep -P $_VTIOFSL_APRUN_PID`
		if [ -z "$_VTIOFSL_CHILD_PID" ]; then
			_vtiofsl_error_msg "Error: Cannot find child of aprun process $_VTIOFSL_APRUN_PID, already dead?"
		else
			if kill -s 2 $_VTIOFSL_CHILD_PID; then
				_vtiofsl_verbose_msg 2 "Sent SIGINT to aprun child process $_VTIOFSL_CHILD_PID"
			else
				_vtiofsl_error_msg "Error: Could not send SIGINT to child process $_VTIOFSL_CHILD_PID, already dead?"
			fi
		fi
	done
	
	_vtiofsl_verbose_msg 1 "Waiting for iofsl servers to shutdown"
	for _VTIOFSL_APRUN_PID in $_VTIOFSL_APRUN_PIDS
	do
		_vtiofsl_verbose_msg 2 "Waiting for aprun process $_VTIOFSL_APRUN_PID"
		wait $_VTIOFSL_APRUN_PID 2> /dev/null
		# Disabled job control, therfore not necessary anymore
		# This should only matter for interactive shells with job control
		# Pipe the errors away otherwise
		# fg 2> /dev/null
	done
	
	_vtiofsl_verbose_msg 1 "iofsl shutdown complete."
	
	return 0
}

