# @package      hubzero-submit-client
# @file         ProbeLauncher.py
# @author       Steven Clark <clarks@purdue.edu>
# @copyright    Copyright (c) 2004-2017 HUBzero Foundation, LLC.
# @license      http://opensource.org/licenses/MIT MIT
#
# Copyright (c) 2004-2017 HUBzero Foundation, LLC.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# HUBzero is a registered trademark of HUBzero Foundation, LLC.
#
import sys
import os
import logging
import shutil
import stat
import copy
import subprocess
import shlex
import select
import signal
import random
import re
import time
import traceback
from errno import EINTR

from hubzero.submit.LogMessage         import getLogIDMessage as getLogMessage
from hubzero.submit.ParameterTemplate  import ParameterTemplate
from hubzero.submit.RemoteProbeMonitor import RemoteProbeMonitor
from hubzero.submit.Probe              import Probe
from hubzero.submit.ProbeQueue         import ProbeQueue

class ProbeLauncher:
   CONFIGURATIONDIRECTORY = os.path.join(os.sep,'etc','submit')
   CONFIGURATIONFILE      = 'submit-probe.conf'
   PROBELOGLOCATION       = "log"
   PROBELOGFILE           = "probeLauncher.log"

   def __init__(self,
                probeRoot,
                probeLogLocation=PROBELOGLOCATION,
                probeLogFile=PROBELOGFILE,
                configurationDirectory=CONFIGURATIONDIRECTORY):
      logDirectory = os.path.join(probeRoot,probeLogLocation)
      self.openLogger(logDirectory,probeLogFile)
      self.logger = logging.getLogger(__name__)

      self.probeRoot           = probeRoot
      self.configFilePath      = os.path.join(configurationDirectory,self.CONFIGURATIONFILE)
      self.remoteProbeMonitors = []
      self.configData          = {}

      self.rawProbeSiteShellTemplate = """#!/bin/bash
#
trap cleanup HUP INT QUIT ABRT TERM

cleanup()
{
   echo "Abnormal termination by signal"
   pkill -TERM -P $$
}

if [ -n "${PEGASUSVERSION}" ] ; then
   . /etc/environ.sh
   use -e -r pegasus-${PEGASUSVERSION}
fi
exitStatus=0

gridProbeRoot=@@{GRIDPROBEROOT}
probeDirectory=@@{PROBEDIRECTORY}
remoteVenue=@@{REMOTEVENUE}
probeId=@@{PROBEID}
probeProgram=@@{PROBEPROGRAM}
nCores=@@{NCORES}
wallTime=@@{WALLTIME}
inputs="@@{INPUTS}"
preSubmitCommands="@@{PRESUBMITCOMMANDS}"
postSubmitCommands="@@{POSTSUBMITCOMMANDS}"

coreArg=
if [ ${nCores} -gt -1 ] ; then
   coreArg="-nCpus ${nCores}"
fi

wallTimeArg="-wallTime 10"
if [ -n "${wallTime}" ] ; then
   wallTimeArg="-w ${wallTime}"
fi
x509SubmitProxyArg=
if [ -f ~/.globus/probe.proxy ] ; then
   x509SubmitProxyArg="--x509SubmitProxy ~/.globus/probe.proxy"
fi

#echo gridProbeRoot=${gridProbeRoot}
#echo probeDirectory=${probeDirectory}
#echo remoteVenue=${remoteVenue}
#echo probeId=${probeId}
#echo probeProgram=${probeProgram}
#echo nCores=${nCores}
#echo wallTime=${wallTime}
#echo inputs=${inputs}
#echo preSubmitCommands=${preSubmitCommands}
#echo postSubmitCommands=${postSubmitCommands}

cd ${gridProbeRoot}
mkdir -p ${probeDirectory}/${remoteVenue}/${probeId}
exitStatus=$?
if [ ${exitStatus} -ne 0 ] ; then
   exitStatus=7
else
   cd ${probeDirectory}/${remoteVenue}/${probeId}
   exitStatus=$?
   if [ ${exitStatus} -ne 0 ] ; then
      exitStatus=7
   else
      if [ ${probeProgram} = "./probegridsite.sh" ] ; then
         cat >> probegridsite.sh <<EOF
#!/bin/sh
#
exitStatus=0

free -m
printenv | sort 

pwd
ls -lsa

if [ "\${SITE_EXPECT_HOME_DIR}" = "" -o "\${SITE_EXPECT_HOME_DIR}" = "YES" ] ; then
   if [ "\${HOME}" != "" ] ; then
      cd \${HOME}
      exitStatus=\$?

      if [ \$exitStatus -eq 0 ] ; then
         quota -s
         exitStatus=\$?
         if [ \$exitStatus -ne 0 ] ; then
            quota
            exitStatus=\$?
            if [ \$exitStatus -ne 0 ] ; then
               du -sh
               exitStatus=\$?
            fi
         fi
      else
         exitStatus=6
      fi
   fi
fi

exit \$exitStatus
EOF
         chmod +x probegridsite.sh
      fi

      if [ -n "${preSubmitCommands}" ] ; then
         oIFS=$IFS
         IFS=,
         for preSubmitCommand in ${preSubmitCommands} ; do
            eval ${preSubmitCommand}
         done
         IFS=$oIFS
      fi

      inputArg=
      if [ -n "${inputs}" ] ; then
         oIFS=$IFS
         IFS=,
         for input in ${inputs} ; do
            inputArg="${inputArg} -i ${input}"
         done
         IFS=$oIFS
      fi

      ${SUBMITPATH} --venue ${remoteVenue} \
                    ${coreArg} \
                    ${wallTimeArg} \
                    ${x509SubmitProxyArg} \
                    --disableProbeCheck \
                    ${inputArg} \
                    ${probeProgram} > ${probeId}.stdout 2> ${probeId}.stderr &
      wait %1
      exitStatus=$?

      if [ -n "${postSubmitCommands}" ] ; then
         oIFS=$IFS
         IFS=,
         for postSubmitCommand in ${postSubmitCommands} ; do
            eval ${postSubmitCommand}
         done
         IFS=$oIFS
      fi

      if [ ! -s ${probeId}.stdout ] ; then
         rm -f ${probeId}.stdout
      fi
      if [ ! -s ${probeId}.stderr ] ; then
         rm -f ${probeId}.stderr
      fi

      if [ "${jobId}" = "" ] ; then
         for f in `ls *.stdout 2> /dev/null` ; do
            id=${f%.stdout}
            if [ ${id} != ${probeId} ] ; then
               jobId=${id}
               break
            fi
         done
      fi
      if [ "${jobId}" = "" ] ; then
         for f in `ls *.stderr 2> /dev/null` ; do
            id=${f%.stderr}
            if [ ${id} != ${probeId} ] ; then
               jobId=${id}
               break
            fi
         done
      fi

      if [ "${jobId}" != "" ] ; then
         if [ ${exitStatus} -eq 0 ] ; then
            if [ -s ${jobId}.FAILURE ] ; then
               if [ $(grep -c stagein ${jobId}.FAILURE) -gt 0 ] ; then
                  exitStatus=11
               fi
               if [ $(grep -c stageout ${jobId}.FAILURE) -gt 0 ] ; then
                  exitStatus=12
               fi
               if [ $(grep -c 'job failed to start execution within the specified time limit' ${jobId}.FAILURE) -gt 0 ] ; then
                  exitStatus=24
               fi
            fi
         fi

         rm -f ${jobId}.sh ${jobId}_input.tar.gz

         if [ ! -s ${jobId}.stderr ] ; then
            rm -f ${jobId}.stderr
         fi
         if [ ! -s ${jobId}.stdout ] ; then
            rm -f ${jobId}.stdout
         fi
      fi

      if [ -s .submit.log ] ; then
         if [ ${exitStatus} -eq 0 ] ; then
            if [ $(grep -c 'Session authentication failed' .submit.log) -gt 0 ] ; then
               echo 'Session authentication failed' >> ${probeId}.stderr
               exitStatus=4
            fi
         fi
         if [ ${exitStatus} -eq 0 ] ; then
            if [ $(grep -c 'You cannot exceed the one-day job limit' .submit.log) -gt 0 ] ; then
               grep 'You cannot exceed the one-day job limit' .submit.log >> ${probeId}.stderr
               exitStatus=5
            fi
         fi
      fi
   fi
fi

exit ${exitStatus}
"""

      self.bufferSize = 4096

      self.launchMaster            = True
      self.launchMasterTerminated  = False
      self.launchMasterTerminating = False

      self.probeLaunchTerminated  = False
      self.probeLaunchTerminating = False

      self.commandPid = 0
      self.probeSites = {}
      self.probeSitesRetired = []
      self.reloadProbeSitesInfo = True
      self.reloadingProbeSitesInfo = False
      self.remoteProbeJobInfo = {}

      self.toBeScheduledQueue = ProbeQueue()
      self.inProgressQueue    = ProbeQueue()

      self.maxProbeId      = -1
      self.maxJobFinished  = -1
      self.maxProbeStopped = -1

      signal.signal(signal.SIGINT,self.sigINT_handler)
      signal.signal(signal.SIGHUP,self.sigHUP_handler)
      signal.signal(signal.SIGQUIT,self.sigQUIT_handler)
      signal.signal(signal.SIGABRT,self.sigABRT_handler)
      signal.signal(signal.SIGTERM,self.sigTERM_handler)


   def sigGEN_handler(self,
                      signalType,
                      frame):
      if self.launchMaster:
         self.launchMasterTerminating = True
         self.toBeScheduledQueue.purge()
         for fdSite in self.remoteProbeJobInfo:
            probeId,probeSite,childPid,launchTime = self.remoteProbeJobInfo[fdSite]
            self.logger.log(logging.INFO,getLogMessage("Send TERM to child site %s process(%d)" % (probeSite,childPid)))
            os.kill(childPid,signal.SIGTERM)
         self.logger.log(logging.INFO,getLogMessage("probe launcher stopped"))
         self.launchMasterTerminated = True
         self.launchMasterTerminating = False
      else:
         self.probeLaunchTerminating = True
         if self.commandPid:
            self.logger.log(logging.INFO,getLogMessage("Send TERM to child submit process(%d)" % (self.commandPid)))
            os.kill(self.commandPid,signal.SIGTERM)
            self.logger.log(logging.INFO,getLogMessage("probe launched process stopped"))
         self.probeLaunchTerminated = True
         self.probeLaunchTerminating = False


   def sigINT_handler(self,
                      signal,
                      frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGINT!"))
      if not self.reloadingProbeSitesInfo:
         self.reloadProbeSitesInfo = True

   def sigHUP_handler(self,
                      signal,
                      frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGHUP!"))
      self.sigGEN_handler(signal,frame)

   def sigQUIT_handler(self,
                       signal,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGQUIT!"))
      self.sigGEN_handler(signal,frame)

   def sigABRT_handler(self,
                       signal,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGABRT!"))
      self.sigGEN_handler(signal,frame)

   def sigTERM_handler(self,
                       signal,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGTERM!"))
      self.sigGEN_handler(signal,frame)


   def openLogger(self,
                  logDirectory,
                  probeLogFile):
      class EmptyFilter(logging.Filter):
         """
         This is a filter which rejects empty messages

         """

         def filter(self,record):
            if record.getMessage() == "":
               emptyRecord = True
            else:
               emptyRecord = False

            return(not emptyRecord)

      APPLICATIONLOGGER = logging.getLogger('')
      APPLICATIONLOGGER.setLevel(logging.DEBUG)

      if not os.path.isdir(logDirectory):
         os.makedirs(logDirectory)

      probeLogPath = os.path.join(logDirectory,probeLogFile)
      logHandler = logging.FileHandler(probeLogPath)
      self.fdLogFile = logHandler.stream.fileno()

      emptyFilter = EmptyFilter()
      logHandler.addFilter(emptyFilter)

      logFormatter = logging.Formatter('%(asctime)s %(message)s','[%a %b %d %H:%M:%S %Y]')
      logHandler.setFormatter(logFormatter)
      APPLICATIONLOGGER.addHandler(logHandler)


   def configure(self):
      configured = False

      sectionPattern  = re.compile('(\s*\[)([^\s]*)(]\s*)')
      keyValuePattern = re.compile('( *)(\w*)( *= *)(.*[^\s$])( *)')
      commentPattern  = re.compile('\s*#.*')
      inProbeSection  = False

      try:
         fpConfig = open(self.configFilePath,'r')
         try:
            eof = False
            while not eof:
               record = fpConfig.readline()
               if record != "":
                  record = commentPattern.sub("",record)
                  if   sectionPattern.match(record):
                     sectionName = sectionPattern.match(record).group(2)
                     inProbeSection = (sectionName == 'probe')
                     if inProbeSection:
                        self.configData = {'probeDirectory':"Probes",
                                           'completedDumpFileName':"probeLauncher.dump",
                                           'inProgressDumpFileName':"probeLauncher_inProgress.dump",
                                           'toBeScheduledDumpFileName':"probeLauncher_toBeScheduled.dump",
                                           'timeBetweenProbes':120,
                                           'maximumTimeToProbeCompletion':360,
                                           'probeSitesFile':"probeSites.dat",
                                           'probeSiteShell':"./probesite.sh",
                                           'submitPath':"/usr/bin/submit",
                                           'pegasusVersion':"4.5.2",
                                           'remoteProbeMonitorHosts':[]
                                          }
                  elif inProbeSection:
                     if keyValuePattern.match(record):
                        key,value = keyValuePattern.match(record).group(2,4)
                        if key in self.configData:
                           if   isinstance(self.configData[key],list):
                              self.configData[key] = [e.strip() for e in value.split(',')]
                           elif isinstance(self.configData[key],bool):
                              self.configData[key] = bool(value.lower() == 'true')
                           elif isinstance(self.configData[key],float):
                              self.configData[key] = float(value)
                           elif isinstance(self.configData[key],int):
                              self.configData[key] = int(value)
                           elif isinstance(self.configData[key],dict):
                              try:
                                 sampleKey   = list(self.configData[key].keys())[0]
                                 sampleValue = self.configData[key][sampleKey]
                              except:
                                 sampleKey   = "key"
                                 sampleValue = "value"
                              self.configData[key] = {} 
                              for e in value.split(','):
                                 dictKey,dictValue = e.split(':')
                                 if isinstance(sampleKey,int):
                                    dictKey = int(dictKey)
                                 if   isinstance(sampleValue,int):
                                    dictValue = int(dictValue)
                                 elif isinstance(sampleValue,float):
                                    dictValue = float(dictValue)
                                 elif isinstance(sampleValue,bool):
                                    dictValue = bool(dictValue.lower() == 'true')
                                 self.configData[key][dictKey] = dictValue
                           else:
                              self.configData[key] = value
                        else:
                           self.logger.log(logging.WARNING,getLogMessage("Undefined key = value pair %s = %s" % (key,value)))
               else:
                  eof = True
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be read" % (self.configFilePath)))
         finally:
            fpConfig.close()
      except (IOError,OSError):
         self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (self.configFilePath)))

      self.probeSitesPath            = os.path.join(self.probeRoot,self.configData['probeSitesFile'])
      self.completedDumpFilePath     = os.path.join(self.probeRoot,self.configData['completedDumpFileName'])
      self.inProgressDumpFilePath    = os.path.join(self.probeRoot,self.configData['inProgressDumpFileName'])
      self.toBeScheduledDumpFilePath = os.path.join(self.probeRoot,self.configData['toBeScheduledDumpFileName'])

      del self.remoteProbeMonitors
      self.remoteProbeMonitors = []
      for remoteProbeMonitorHost in self.configData['remoteProbeMonitorHosts']:
         remoteProbeMonitor = RemoteProbeMonitor(remoteProbeMonitorHost)
         self.remoteProbeMonitors.append(remoteProbeMonitor)

      configured = True

      return(configured)


   def daemonize(self):
      if self.fdLogFile != sys.stdout.fileno():
         try:
            devnull = open("/dev/null",'r')
            try:
               os.dup2(devnull.fileno(),sys.stdin.fileno())
               os.dup2(self.fdLogFile,sys.stdout.fileno())
               os.dup2(self.fdLogFile,sys.stderr.fileno())
            except OSError:
               self.logger.log(logging.ERROR,getLogMessage("file descriptor dup failed"))
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % ("/dev/null")))

      if os.fork() != 0:
         os.wait()
         sys.exit(0)
      else:
         os.setsid()
         pid = os.fork()
         if pid != 0:
            sys.exit(0)

      time.sleep(2)


   def loadProbeSitesInfo(self):
      oldProbeSites = copy.copy(self.probeSites)
      del self.probeSites
      self.probeSites = {}

      sitePattern     = re.compile('(.*\[)(.*)(])')
      keyValuePattern = re.compile('( *)(\w*)( *= *)(.*[^\s$])( *)')
      commentPattern  = re.compile('\s*#.*')
      siteName        = ""

      fpInfo = open(self.probeSitesPath,'r')
      if fpInfo :
         eof = False
         while not eof:
            record = fpInfo.readline()
            if record != "":
               record = commentPattern.sub("",record)
               if   sitePattern.match(record):
                  siteName = sitePattern.match(record).group(2)
                  self.probeSites[siteName] = {'probeProgram':'./probegridsite.sh', \
                                               'nCores':-1, \
                                               'maximumTimeToProbeCompletion':-1, \
                                               'wallTime':10, \
                                               'inputs':[], \
                                               'preSubmitCommands':[], \
                                               'postSubmitCommands':[], \
                                               'state':'active' \
                                              }
                  if siteName in oldProbeSites:
                     del oldProbeSites[siteName]
               elif keyValuePattern.match(record):
                  key,value = keyValuePattern.match(record).group(2,4)
                  if key in self.probeSites[siteName]:
                     if   isinstance(self.probeSites[siteName][key],list):
                        self.probeSites[siteName][key] = [e.strip() for e in value.split(',')]
                     elif isinstance(self.probeSites[siteName][key],bool):
                        self.probeSites[siteName][key] = bool(value.lower() == 'true')
                     elif isinstance(self.probeSites[siteName][key],float):
                        self.probeSites[siteName][key] = float(value)
                     elif isinstance(self.probeSites[siteName][key],int):
                        self.probeSites[siteName][key] = int(value)
                     elif isinstance(self.probeSites[siteName][key],dict):
                        try:
                           sampleKey   = self.probeSites[siteName][key].keys()[0]
                           sampleValue = self.probeSites[siteName][key][sampleKey]
                        except:
                           sampleKey   = "key"
                           sampleValue = "value"
                        self.probeSites[siteName][key] = {}
                        for e in value.split(','):
                           dictKey,dictValue = e.split(':')
                           if isinstance(sampleKey,int):
                              dictKey = int(dictKey)
                           if   isinstance(sampleValue,int):
                              dictValue = int(dictValue)
                           elif isinstance(sampleValue,float):
                              dictValue = float(dictValue)
                           elif isinstance(sampleValue,bool):
                              dictValue = bool(dictValue.lower() == 'true')
                           self.probeSites[siteName][key][dictKey] = dictValue
                     else:
                        self.probeSites[siteName][key] = value
                  else:
                     self.logger.log(logging.WARNING,getLogMessage("Undefined key = value pair %s = %s for site %s" % \
                                                                                               (key,value,siteName)))
            else:
               eof = True
         fpInfo.close()

         for siteName in oldProbeSites:
            self.probeSitesRetired.append(siteName)
         del oldProbeSites


   def isProbeSiteActive(self,
                         probeSite):
      probeSiteActive = False
      if probeSite in self.probeSites:
         if self.probeSites[probeSite]['state'] == 'active':
            probeSiteActive = True

      return(probeSiteActive)


   def scanProbeHistory(self):
      if os.path.exists(self.completedDumpFilePath):
         shutil.copy2(self.completedDumpFilePath,self.completedDumpFilePath+".bck")
         fpDumpFile = open(self.completedDumpFilePath,'r')
         if fpDumpFile:
            loadCompleted = False
            while not loadCompleted:
               probe = Probe(-1,time.time(),"FILE")
               probe.read(fpDumpFile)
               if probe.id >= 0:
                  self.maxProbeId = max(self.maxProbeId,probe.id)
                  self.maxJobFinished = max(self.maxJobFinished,probe.jobFinished)
                  if probe.destination == "Probe_Server_Stopped":
                     self.maxProbeStopped = max(self.maxProbeStopped,probe.when)
               else:
                  loadCompleted = True
               del probe
            fpDumpFile.close()


   def executeProbeCommand(self,
                           command,
                           environment=None,
                           streamOutput=False):
      if isinstance(command,list):
         child = subprocess.Popen(command,bufsize=self.bufferSize,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  env=environment,
                                  close_fds=True)
      else:
         commandArgs = shlex.split(command)
         child = subprocess.Popen(commandArgs,bufsize=self.bufferSize,
                                  stdout=subprocess.PIPE,
                                  stderr=subprocess.PIPE,
                                  env=environment,
                                  close_fds=True)
      self.commandPid = child.pid
      childout        = child.stdout
      childoutFd      = childout.fileno()
      childerr        = child.stderr
      childerrFd      = childerr.fileno()

      outEOF = False
      errEOF = False

      outData = []
      errData = []

      while True:
         toCheck = []
         if not outEOF:
            toCheck.append(childoutFd)
         if not errEOF:
            toCheck.append(childerrFd)

         try:
            ready = select.select(toCheck,[],[]) # wait for input
         except select.error as e:
            if e.args[0] == EINTR:
               ready = {}
               ready[0] = []
               if self.probeLaunchTerminating or self.probeLaunchTerminated:
                  outEOF = True
                  errEOF = True
            else:
               raise

         if os.getppid() == 1:
            os.kill(os.getpid(),signal.SIGTERM)

         if childoutFd in ready[0]:
            outChunk = os.read(childoutFd,self.bufferSize)
            if outChunk == '':
               outEOF = True
            outData.append(outChunk)
            if streamOutput:
               sys.stdout.write(outChunk)
               sys.stdout.flush()

         if childerrFd in ready[0]:
            errChunk = os.read(childerrFd,self.bufferSize)
            if errChunk == '':
               errEOF = True
            errData.append(errChunk)
            if streamOutput:
               sys.stderr.write(errChunk)
               sys.stderr.flush()

         if outEOF and errEOF:
            break

      pid,err = os.waitpid(self.commandPid,0)
      self.commandPid = 0
      if err != 0:
         if os.WIFSIGNALED(err):
            self.logger.log(logging.INFO,getLogMessage("%s failed w/ signal %d" % (command,os.WTERMSIG(err))))
         else:
            if os.WIFEXITED(err):
               err = os.WEXITSTATUS(err)
            self.logger.log(logging.INFO,getLogMessage("%s failed w/ exit code %d" % (command,err)))
         if not streamOutput:
            self.logger.log(logging.INFO,getLogMessage("%s" % ("".join(errData))))

      return(err,"".join(outData),"".join(errData))


   def verifySubmit(self):
      verifiedSubmit = False
      if os.path.exists(self.configData['submitPath']):
         if os.access(self.configData['submitPath'],os.X_OK):
#           test to see if submit session is viable
            command = [self.configData['submitPath'],'--local','id']
            exitStatus,stdOutput,stdError = self.executeProbeCommand(command)
            if exitStatus == 0:
               verifiedSubmit = True
         else:
            self.logger.log(logging.ERROR,getLogMessage("%s is not executable." % (self.configData['submitPath'])))
      else:
         self.logger.log(logging.ERROR,getLogMessage("%s does not exist." % (self.configData['submitPath'])))

      return(verifiedSubmit)


   def run(self):
      self.logger.log(logging.INFO,getLogMessage("**************************"))
      self.logger.log(logging.INFO,getLogMessage("* probe launcher started *"))
      self.logger.log(logging.INFO,getLogMessage("**************************"))

      fpDumpFile = open(self.completedDumpFilePath,'a')
      newProbeId = self.maxProbeId

      if self.maxJobFinished > self.maxProbeStopped:
         newProbeId += 1
         probe = Probe(newProbeId,self.maxJobFinished+1,"Probe_Server_Stopped")
         probe.dump(fpDumpFile)
         del probe

      newProbeId += 1
      probe = Probe(newProbeId,time.time(),"Probe_Server_Started")
      probe.dump(fpDumpFile)
      del probe

      inputDescriptors = []

      isFinished = self.toBeScheduledQueue.isEmpty() and self.inProgressQueue.isEmpty() and not self.reloadProbeSitesInfo
      while not isFinished:
         if self.reloadProbeSitesInfo:
            self.reloadingProbeSitesInfo = True
            self.configure()
            self.loadProbeSitesInfo()

   # remove sites that should no longer be probed from queue
            existingDestinations = self.toBeScheduledQueue.getDestinations()
            for destination in existingDestinations:
               if not self.isProbeSiteActive(destination):
                  probe = self.toBeScheduledQueue.getDestination(destination)
                  while probe:
                     probe = self.toBeScheduledQueue.getDestination(destination)

   # build set of sites where probe is in progress or scheduled for the future
            existingDestinations = self.toBeScheduledQueue.getDestinations()
            pendingDestinations = self.inProgressQueue.getDestinations()
            for destination in pendingDestinations:
               if not destination in existingDestinations:
                  existingDestinations.append(destination)

   # for sites that should be probed make sure that there is a probe in progress or scheduled for the future
            for probeSite in self.probeSites:
               if self.isProbeSiteActive(probeSite):
                  if not probeSite in existingDestinations:
                     newProbeId += 1
                     interval = random.randint(0,self.configData['timeBetweenProbes']*60)
                     self.logger.log(logging.INFO,getLogMessage("Initial %s probe set to launch in %d seconds" % \
                                                                                          (probeSite,interval)))
                     probe = Probe(newProbeId,time.time()+interval,probeSite)
                     self.toBeScheduledQueue.add(probe)

            self.reloadProbeSitesInfo = False
            self.reloadingProbeSitesInfo = False

         for fdSite in self.remoteProbeJobInfo:
            probeId,probeSite,childPid,launchTime = self.remoteProbeJobInfo[fdSite]
            if probeSite in self.probeSites:
               maximumTimeToProbeCompletion = self.probeSites[probeSite]['maximumTimeToProbeCompletion']
               if maximumTimeToProbeCompletion > 0:
                  maximumTimeToProbeCompletion = maximumTimeToProbeCompletion*60.
               else:
                  maximumTimeToProbeCompletion = self.configData['maximumTimeToProbeCompletion']*60
            else:
               maximumTimeToProbeCompletion = self.configData['maximumTimeToProbeCompletion']*60
            if launchTime+maximumTimeToProbeCompletion <= time.time():
               self.logger.log(logging.INFO,getLogMessage("Send TERM to child site %s process(%d)" % (probeSite,childPid)))
               os.kill(childPid,signal.SIGTERM)

         try:
            if self.toBeScheduledQueue.isEmpty():
               if not self.inProgressQueue.isEmpty():
                  readyInputFds = select.select(inputDescriptors,[],[])[0]
               else:
                  readyInputFds = []
            else:
               probe = self.toBeScheduledQueue.peek()
               maximumWaitTime = probe.when - time.time()
               if maximumWaitTime > 0.:
                  readyInputFds = select.select(inputDescriptors,[],[],maximumWaitTime)[0]
               else:
                  readyInputFds = []
         except select.error as e:
            if e.args[0] == EINTR:
               readyInputFds = []
            else:
               raise

         for readyInputFd in readyInputFds:
            probeId,probeSite,childPid,launchTime = self.remoteProbeJobInfo[readyInputFd]
            message = os.read(readyInputFd,1)

            finishedPid,exitStatus = os.waitpid(childPid,0)
            if exitStatus != 0:
               if os.WIFSIGNALED(exitStatus):
                  self.logger.log(logging.INFO,getLogMessage("Closed %s with signal %d" % (probeSite,os.WTERMSIG(exitStatus))))
               else:
                  if os.WIFEXITED(exitStatus):
                     exitStatus = os.WEXITSTATUS(exitStatus)
                  self.logger.log(logging.INFO,getLogMessage("Closed %s with exit code %d" % (probeSite,exitStatus)))
               status = "FAILED"
            else:
               self.logger.log(logging.INFO,getLogMessage("Closed %s with exit code %d" % (probeSite,exitStatus)))
               status = "PASSED"
            inputDescriptors.remove(readyInputFd)
            del self.remoteProbeJobInfo[readyInputFd]
            try:
               os.close(readyInputFd)
            except:
               self.logger.log(logging.INFO,getLogMessage("close(readyInputFd) failed"))

            probe = self.inProgressQueue.getId(probeId)
            probe.jobFinished = time.time()
            probe.exitStatus = exitStatus
            probe.dump(fpDumpFile)

            probeDumpFile = os.path.join(self.probeRoot,self.configData['probeDirectory'],probe.destination,
                                         "%09d" % (probe.id),"%09d.dump" % (probe.id))
            fpProbeDumpFile = open(probeDumpFile,'w')
            probe.dump(fpProbeDumpFile)
            fpProbeDumpFile.close()

            if exitStatus != 143:
               for remoteProbeMonitor in self.remoteProbeMonitors:
                  remoteProbeMonitor.postProbeSiteUpdate(probeSite,
                                                         int(probe.jobFinished),
                                                         status,
                                                         probe.jobFinished-probe.jobStarted)
            del probe

            if not self.launchMasterTerminating and not self.launchMasterTerminated:
               if self.isProbeSiteActive(probeSite):
                  newProbeId += 1
                  probe = Probe(newProbeId,time.time()+self.configData['timeBetweenProbes']*60,probeSite)
                  self.toBeScheduledQueue.add(probe)

         while not self.toBeScheduledQueue.isEmpty():
            probe = self.toBeScheduledQueue.peek()
            if probe.when <= time.time():
               probe = self.toBeScheduledQueue.pop()
               probe.jobStarted = time.time()
               if self.isProbeSiteActive(probe.destination):
                  self.inProgressQueue.add(probe)
                  inputs             = ','.join(self.probeSites[probe.destination]['inputs'])
                  preSubmitCommands  = ','.join(self.probeSites[probe.destination]['preSubmitCommands'])
                  postSubmitCommands = ','.join(self.probeSites[probe.destination]['postSubmitCommands'])

                  substitutions = {}
                  substitutions["GRIDPROBEROOT"]      = self.probeRoot
                  substitutions["PROBEDIRECTORY"]     = self.configData['probeDirectory']
                  substitutions["REMOTEVENUE"]        = probe.destination
                  substitutions["PROBEID"]            = "%09d" % (probe.id)
                  substitutions["PROBEPROGRAM"]       = self.probeSites[probe.destination]['probeProgram']
                  substitutions["NCORES"]             = self.probeSites[probe.destination]['nCores']
                  substitutions["WALLTIME"]           = self.probeSites[probe.destination]['wallTime']
                  substitutions["INPUTS"]             = inputs
                  substitutions["PRESUBMITCOMMANDS"]  = preSubmitCommands
                  substitutions["POSTSUBMITCOMMANDS"] = postSubmitCommands

                  probeSiteShellTemplate = ParameterTemplate(self.rawProbeSiteShellTemplate)

                  try:
                     probeSiteShellScript = probeSiteShellTemplate.substitute_recur(substitutions)
                  except KeyError as e:
                     probeSiteShellScript = ""
                     self.logger.log(logging.ERROR,getLogMessage("Pattern substitution failed for @@{%s}\n" % (e.args[0])))
                  except TypeError:
                     probeSiteShellScript = ""
                     self.logger.log(logging.ERROR,getLogMessage("Submission script substitution failed:\n%s\n" % \
                                                                               (self.rawProbeSiteShellTemplate)))

                  probeJobPath = os.path.join(self.probeRoot,self.configData['probeDirectory'],
                                              probe.destination,"%09d" % (probe.id))
                  if not os.path.isdir(probeJobPath):
                     os.makedirs(probeJobPath)

                  probeCommandShellPath = os.path.join(probeJobPath,self.configData['probeSiteShell'])
                  fpProbeShell = open(probeCommandShellPath,'w')
                  fpProbeShell.write(probeSiteShellScript)
                  fpProbeShell.close()
                  os.chmod(probeCommandShellPath,stat.S_IRWXU|stat.S_IRGRP|stat.S_IROTH)

                  parentReceiveFd,childSendFd = os.pipe()

                  pid = os.fork()
                  if pid:
# parent
                     try:
                        os.close(childSendFd)
                     except:
                        self.logger.log(logging.INFO,getLogMessage("close(childSendFd) failed"))
                     inputDescriptors.append(parentReceiveFd)
                     self.remoteProbeJobInfo[parentReceiveFd] = (probe.id,probe.destination,pid,time.time())
                  else:
# child
                     self.launchMaster = False
                     try:
                        os.close(parentReceiveFd)
                     except:
                        self.logger.log(logging.INFO,getLogMessage("close(parentReceiveFd) failed"))
                     try:
                        fpDumpFile.close()
                     except:
                        self.logger.log(logging.INFO,getLogMessage("close(fpDumpFile) failed"))
                     self.logger.log(logging.INFO,getLogMessage("Launching %s" % (probe.destination)))
                     os.environ['SUBMITPATH']     = self.configData['submitPath']
                     os.environ['PEGASUSVERSION'] = self.configData['pegasusVersion']
                     exitStatus,stdOutput,stdError = self.executeProbeCommand(probeCommandShellPath,
                                                                              environment=os.environ)
                     os.write(childSendFd,"0")
                     try:
                        os.close(childSendFd)
                     except:
                        self.logger.log(logging.INFO,getLogMessage("close(childSendFd) failed"))
                     self.logger.log(logging.INFO,getLogMessage("Closing %s with exit code %d" % (probe.destination,exitStatus)))
                     try:
                        os.remove(probeCommandShellPath)
                     except:
                        pass
                     sys.exit(exitStatus)
               else:
                  if probe.destination in self.probeSitesRetired:
                     self.logger.log(logging.INFO,getLogMessage("%s retired" % (probe.destination)))
                  else:
                     self.logger.log(logging.INFO,getLogMessage("%s not in probeSites" % (probe.destination)))
            else:
               break

         isFinished = self.toBeScheduledQueue.isEmpty() and self.inProgressQueue.isEmpty() and not self.reloadProbeSitesInfo

      newProbeId += 1
      probe = Probe(newProbeId,time.time(),"Probe_Server_Stopped")
      probe.dump(fpDumpFile)
      del probe
      self.inProgressQueue.dump(self.inProgressDumpFilePath)
      self.toBeScheduledQueue.dump(self.toBeScheduledDumpFilePath)

      self.logger.log(logging.INFO,getLogMessage("**************************"))
      self.logger.log(logging.INFO,getLogMessage("* probe launcher stopped *"))
      self.logger.log(logging.INFO,getLogMessage("**************************"))


