# @package      hubzero-submit-distributor
# @file         RemoteInstantSCRIPT.py
# @author       Steven Clark <clarks@purdue.edu>
# @copyright    Copyright (c) 2012-2014 HUBzero Foundation, LLC.
# @license      http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
#
# Copyright (c) 2012-2014 HUBzero Foundation, LLC.
#
# This file is part of: The HUBzero(R) Platform for Scientific Collaboration
#
# The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
# software: you can redistribute it and/or modify it under the terms of
# the GNU Lesser General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# HUBzero is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#

import re
import random
import logging

class RemoteInstantSCRIPT:
   def __init__(self,
                hubUserName,
                hubUserId,
                runName,
                localJobId,
                instanceId,
                workingDirectory,
                appScriptName,
                isMultiCoreRequest,
                siteInfo,
                managerInfo,
                nNodes,
                ppn,
                timeHistoryLogs):
      self.logger               = logging.getLogger(__name__)
      self.hubUserName          = hubUserName
      self.hubUserId            = hubUserId
      self.runName              = runName
      self.localJobId           = localJobId
      self.instanceId           = instanceId
      self.workingDirectory     = workingDirectory
      self.appScriptName        = appScriptName
      self.isMultiCoreRequest   = isMultiCoreRequest
      self.computationMode      = managerInfo['computationMode']
      self.preManagerCommands   = managerInfo['preManagerCommands']
      self.managerCommand       = managerInfo['managerCommand']
      self.postManagerCommands  = managerInfo['postManagerCommands']
      self.nNodes               = nNodes
      self.ppn                  = ppn
      self.venue                = siteInfo['venue']
      self.venues               = siteInfo['venues']
      self.venueIndex           = siteInfo['venueIndex']
      self.timestampStart       = timeHistoryLogs['timestampStart']
      self.timestampFinish      = timeHistoryLogs['timestampFinish']
      self.timestampTransferred = timeHistoryLogs['timestampTransferred']
      self.timeResults          = timeHistoryLogs['timeResults']

      self.nodeFileName = ""
      self.nodeList     = []


   def __makeMPITemplate(self):
      return """#!/bin/bash
# RemoteInstantSCRIPT:makeMPITemplate

trap cleanup HUP INT QUIT ABRT TERM

cleanup()
{
#  echo "master host = $(hostname --fqdn)" >> kill.out
   mpirunPidTree=$(pstree -l -p `jobs -p`)

   psString=${mpirunPidTree%%-[rs]sh(*}
#  echo "noSshPsString = ${psString}" >> kill.out
   noSshPids=""
   while [ true ] ; do
      case ${psString} in
         *\(*\)*)
            pid=${psString##*(}
            pid=${pid%%)*}
            noSshPids="${noSshPids} ${pid}"
            psString=${psString%(*}
            ;;
         *)
            break
            ;;
      esac
   done
#  echo "noSshPids = ${noSshPids}" >> kill.out

   psString=${mpirunPidTree#*-[rs]sh(}
   psString="(${psString}"
#  echo "sshPsString = ${psString}" >> kill.out
   sshPids=""
   while [ true ] ; do
      case ${psString} in
         *\(*\)*)
            pid=${psString##*(}
            pid=${pid%%)*}
            sshPids="${sshPids} ${pid}"
            psString=${psString%(*}
            ;;
         *)
            break
            ;;
      esac
   done
#  echo "sshPids = ${sshPids}" >> kill.out

   hosts=""
   for pid in ${sshPids} ; do
      command=$(ps --no-headers --format command ${pid})
      args=$(echo ${command} | cut -d' ' -f2-)
      for arg in ${args} ; do
         case ${arg} in
            -* )
               continue ;;
             * )
               host=${arg}
               hosts="${hosts} ${host}"
               break ;;
         esac
      done
   done
#  echo "hosts = ${hosts}" >> kill.out

   hostPids=""
   for host in ${hosts} ; do
      slavePid=$(ssh ${host} pgrep -f `pwd`/APPSCRIPTNAME)
      slavePsTree=$(ssh ${host} pstree -p ${slavePid})
      psString="(${slavePsTree#*(}"
      pid=${psString##*(}
      pid=${pid%%)*}
      hostPids="${hostPids} ${host}:${pid}"
   done
#  echo "hostPids = ${hostPids}" >> kill.out

   for hostPid in ${hostPids} ; do
      host=$(echo ${hostPid} | cut -d':' -f1)
      pid=$(echo ${hostPid} | cut -d':' -f2)
      ssh ${host} kill -TERM ${pid}
   done
   sleep 2
   for hostPid in ${hostPids} ; do
      host=$(echo ${hostPid} | cut -d':' -f1)
      pid=$(echo ${hostPid} | cut -d':' -f2)
      ssh ${host} kill -KILL ${pid}
   done

   for pid in ${noSshPids} ; do
      kill -0 ${pid} > /dev/null 2>&1
      if [ $? -eq 0 ] ; then
         kill -TERM ${pid}
         sleep 1
      fi
   done

   for pid in ${noSshPids} ; do
      kill -0 ${pid} > /dev/null 2>&1
      if [ $? -eq 0 ] ; then
         kill -KILL ${pid}
         sleep 1
      fi
   done

   if [ ! -s TS_FINISH ] ; then
      date +"%s" > TS_FINISH
   fi
   touch TIME_RESULTS-0
   cat TIME_RESULTS-[0-9]* >> TIME_RESULTS
   rm -f TIME_RESULTS-[0-9]*
}

exitStatus=0
# Change to directory where job was submitted.
cd WORKINGDIRECTORY
export PATH=WORKINGDIRECTORY:${PATH}

date +"%s" > TS_TRANSFERRED
date +"%s" > TS_START

PREMANAGERCOMMANDS
MANAGERCOMMAND `pwd`/APPSCRIPTNAME &
wait %1
exitStatus=$?
POSTMANAGERCOMMANDS

date +"%s" > TS_FINISH

touch TIME_RESULTS-0
cat TIME_RESULTS-[0-9]* >> TIME_RESULTS
rm -f TIME_RESULTS-[0-9]*

exit ${exitStatus}
"""


   def __buildMPIFile(self):
      # setup regex's for the template
      re_preManagerCommands  = re.compile("PREMANAGERCOMMANDS")
      re_managerCommand      = re.compile("MANAGERCOMMAND")
      re_postManagerCommands = re.compile("POSTMANAGERCOMMANDS")
      re_nnodes              = re.compile("NNODES")
      re_processors          = re.compile("NPROCESSORS")
      re_appScriptName       = re.compile("APPSCRIPTNAME")
      re_tsTransferred       = re.compile("TS_TRANSFERRED")
      re_tsStart             = re.compile("TS_START")
      re_tsFinish            = re.compile("TS_FINISH")
      re_timeResults         = re.compile("TIME_RESULTS")
      re_runName             = re.compile("RUNNAME")
      re_jobId               = re.compile("JOBID")
      re_instanceId          = re.compile("INSTANCEID")
      re_workingDirectory    = re.compile("WORKINGDIRECTORY")
      re_nodeFile            = re.compile("\${PBS_NODEFILE}")
      re_hubUserName         = re.compile("HUBUSERNAME")
      re_hubUserId           = re.compile("HUBUSERID")

      template = self.__makeMPITemplate()

      template = re_preManagerCommands.sub("\n".join(self.preManagerCommands),template)
      template = re_postManagerCommands.sub("\n".join(self.postManagerCommands),template)
      template = re_managerCommand.sub(self.managerCommand,template)
      template = re_nnodes.sub(self.nNodes,template)
      nProcessors = str(int(self.nNodes)*int(self.ppn))
      template = re_processors.sub(nProcessors,template)
      template = re_appScriptName.sub(self.appScriptName,template)
      template = re_tsTransferred.sub(self.timestampTransferred,template)
      template = re_tsStart.sub(self.timestampStart,template)
      template = re_tsFinish.sub(self.timestampFinish,template)
      template = re_timeResults.sub(self.timeResults,template)
      template = re_runName.sub(self.runName,template)
      template = re_jobId.sub(self.localJobId,template)
      template = re_instanceId.sub(self.instanceId,template)
      template = re_workingDirectory.sub(self.workingDirectory,template)
      template = re_nodeFile.sub(self.nodeFileName,template)
      template = re_hubUserName.sub(self.hubUserName,template)
      template = re_hubUserId.sub(str(self.hubUserId),template)

      return(template)


   def __buildNodeList(self):
      del self.nodeList
      self.nodeList = []
      self.nodeFileName = "%s_%s.machinelist" % (self.localJobId,self.instanceId)

      for core in xrange(int(self.ppn)):
         self.nodeList.append(self.venue)
      eligible = range(len(self.venues))
      eligible.remove(self.venueIndex)
      for node in xrange(int(self.nNodes) - 1):
         index = eligible[random.randint(0,len(eligible)-1)]
         enclosure = self.venues[index]
         for core in xrange(int(self.ppn)):
            self.nodeList.append(enclosure)
         eligible.remove(index)
         if len(eligible) == 0:
            eligible = range(len(self.venues))


   def __makeMatlabPCTTemplate(self):
      return """#!/bin/sh
# RemoteInstantSCRIPT:makeMatlabPCTTemplate

trap cleanup HUP INT QUIT ABRT TERM

cleanup()
{
#  echo "Abnormal termination by signal"
   kill -TERM `jobs -p`
   if [ ! -s TS_FINISH ] ; then
      date +"%s" > TS_FINISH
   fi
}

exitStatus=0
# Change to directory where job was submitted.
cd WORKINGDIRECTORY
export PATH=WORKINGDIRECTORY:${PATH}

date +"%s" > TS_TRANSFERRED
date +"%s" > TS_START

PREMANAGERCOMMANDS
`pwd`/APPSCRIPTNAME &
wait %1
exitStatus=$?
POSTMANAGERCOMMANDS

date +"%s" > TS_FINISH

touch TIME_RESULTS-0
cat TIME_RESULTS-[0-9]* >> TIME_RESULTS
rm -f TIME_RESULTS-[0-9]*

exit ${exitStatus}
"""


   def __buildMatlabPCTFile(self):
      # setup regex's for the template
      re_preManagerCommands  = re.compile("PREMANAGERCOMMANDS")
      re_postManagerCommands = re.compile("POSTMANAGERCOMMANDS")
      re_appScriptName       = re.compile("APPSCRIPTNAME")
      re_tsTransferred       = re.compile("TS_TRANSFERRED")
      re_tsStart             = re.compile("TS_START")
      re_tsFinish            = re.compile("TS_FINISH")
      re_timeResults         = re.compile("TIME_RESULTS")
      re_workingDirectory    = re.compile("WORKINGDIRECTORY")
      re_hubUserName         = re.compile("HUBUSERNAME")
      re_hubUserId           = re.compile("HUBUSERID")

      template = self.__makeMatlabPCTTemplate()

      template = re_preManagerCommands.sub("\n".join(self.preManagerCommands),template)
      template = re_postManagerCommands.sub("\n".join(self.postManagerCommands),template)
      template = re_appScriptName.sub(self.appScriptName,template)
      template = re_tsTransferred.sub(self.timestampTransferred,template)
      template = re_tsStart.sub(self.timestampStart,template)
      template = re_tsFinish.sub(self.timestampFinish,template)
      template = re_timeResults.sub(self.timeResults,template)
      template = re_workingDirectory.sub(self.workingDirectory,template)
      template = re_hubUserName.sub(self.hubUserName,template)
      template = re_hubUserId.sub(str(self.hubUserId),template)

      return(template)


   def buildBatchScript(self):
      batchLogName = ""
      batchScriptName = ""
      if self.isMultiCoreRequest:
         if   self.computationMode == 'mpi':
            self.__buildNodeList()
            batchScript = self.__buildMPIFile()
            batchScriptName = "%s_%s.cl" % (self.localJobId,self.instanceId)
         elif self.computationMode == 'parallel':
            batchScript = ""
         elif self.computationMode == 'matlabmpi':
            batchScript = ""
         elif self.computationMode == 'matlabpct':
            batchScript = self.__buildMatlabPCTFile()
            batchScriptName = "%s_%s.cl" % (self.localJobId,self.instanceId)
      else:
         batchScript = ""

      return(batchLogName,batchScriptName,batchScript)


   def getBatchNodeList(self):
      return(self.nodeFileName,self.nodeList)


