#
# Copyright (c) 2004-2011 Purdue University All rights reserved.
# 
# Developed by: HUBzero Technology Group, Purdue University
#               http://hubzero.org
# 
# HUBzero is free software: you can redistribute it and/or modify it under the terms of the
# GNU Lesser General Public License as published by the Free Software Foundation, either
# version 3 of the License, or (at your option) any later version.
# 
# HUBzero is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
# without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU Lesser General Public License for more details.  You should have received a
# copy of the GNU Lesser General Public License along with HUBzero.
# If not, see <http://www.gnu.org/licenses/>.
# 
# GNU LESSER GENERAL PUBLIC LICENSE
# Version 3, 29 June 2007
# Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
#
import sys
import time
import traceback
import csv
import os

from hubzero.submit.LogMessage  import logID as log
from hubzero.submit.MessageCore import MessageCore
from hubzero.submit.JobOutput   import *

class RemoteJobMonitor(MessageCore):
   def __init__(self,
                host,
                port,
                repeatDelay=5,
                fixedBufferSize=64):
      MessageCore.__init__(self,listenerHost=host,listenerPort=port,repeatDelay=repeatDelay)
      self.fixedBufferSize = fixedBufferSize
      self.enteredCommand  = None
      self.startDate       = None


   def postNewJobSubmission(self,
                            siteMonitorDesignator,
                            remoteJobId,
                            hubUserId,
                            localJobId,
                            destination):
      queryMessage = 'S:' + siteMonitorDesignator + " " + remoteJobId + " " + hubUserId + " " + localJobId + " " + destination
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      log("confirmation: S(%d):%s" % (nTry,response))


   def postNewWorkflowSubmission(self,
                                 siteMonitorDesignator,
                                 remoteJobId,
                                 hubUserId,
                                 enteredCommand,
                                 localJobId,
                                 nInstances,
                                 destination):
      self.enteredCommand = enteredCommand
      self.startDate      = time.strftime("%a %b %d %X %Z %Y")
      workflowId = ';'.join(('WF',localJobId,str(nInstances)))
      queryMessage = 'S:' + siteMonitorDesignator + " " + remoteJobId + " " + hubUserId + " " + workflowId + " " + destination
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      log("confirmation: S(%d):%s" % (nTry,response))


   def queryRemoteJobStatus(self,
                            siteMonitorDesignator,
                            remoteJobId):
      queryMessage = 'Q:' + siteMonitorDesignator + " " + remoteJobId
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      if nTry > 1:
         log("confirmation: Q(%d):%s" % (nTry,response))

      jobStatus,jobStage = response.strip().split()
      if ';' in jobStage:
         jobStage,jobSite = jobStage.split(';')
      else:
         jobSite = '?'
      if jobStage == '?':
         jobStage = "Job"

      return(jobStatus,jobStage,jobSite)


   def queryWorkflowStatus(self,
                           siteMonitorDesignator,
                           remoteJobId,
                           nInstances):
      queryMessage = 'W:' + siteMonitorDesignator + " " + remoteJobId + " " + str(nInstances)
      nTry,response = self.requestMessageVariableResponse(queryMessage,
                                                          self.fixedBufferSize,
                                                          self.fixedBufferSize)
      response = response.split(':')

      if nTry > 1:
         log("confirmation: W(%d):%s" % (nTry,response[0]))

      dagStatus,dagStage = response[0].strip().split()
      if ';' in dagStage:
         dagStage,dagSite = dagStage.split(';')
      else:
         dagSite = '?'
      if dagStage == '?':
         dagStage = "DAG"
      del response[0]

      wfInstances = {}
      for wfInstance in response:
         instance,jobStatus,jobStage = wfInstance.strip().split()
         wfInstances[int(instance)] = {}
         wfInstances[int(instance)]['jobStatus'] = jobStatus
         wfInstances[int(instance)]['jobStage']  = jobStage

      return(dagStatus,dagStage,dagSite,wfInstances)


   def terminateRemoteJob(self,
                          siteMonitorDesignator,
                          remoteJobId):
      queryMessage = 'T:' + siteMonitorDesignator + " " + remoteJobId
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      log("confirmation: T(%d):%s" % (nTry,response))


   def queryRemoteActiveJobStatus(self,
                                  siteMonitorDesignator,
                                  remoteJobId):
      queryMessage = 'R:' + siteMonitorDesignator + " " + remoteJobId
      nTry,report,lastReportTime = self.requestMessageTimestampResponse(queryMessage,
                                                                        self.fixedBufferSize,
                                                                        self.fixedBufferSize)

      if nTry > 1:
         log("confirmation: R(%d):%s" % (nTry,lastReportTime))

      return(float(lastReportTime),report)


   def queryUserActiveJobStatus(self,
                                hubUserId):
      queryMessage = 'U:' + hubUserId
      nTry,report,reportTime = self.requestMessageTimestampResponse(queryMessage,
                                                                    self.fixedBufferSize,
                                                                    self.fixedBufferSize)

      if nTry > 1:
         log("confirmation: R(%d):%s" % (nTry,reportTime))

      reportedJobs = {}
      if len(report) > 0:
         jobs = report.split(':')
         for job in jobs:
            localJobId,jobQueue,site,jobStatus,jobStage = job.split()
            if ';' in jobStage:
               jobStage,jobSite = jobStage.split(';')
            else:
               jobSite = site
            jobStatusMessage = self.__getJobStatusMessage(jobStatus)
            if jobStage == '?':
               jobStage = 'Job'
            reportedJobs[localJobId] = (jobQueue,jobSite,jobStatusMessage,jobStage)

      return(float(reportTime),reportedJobs)


   def __getJobStatusMessage(self,
                             jobStatus):
      jobStatusMessages = {}
      jobStatusMessages['N']  = 'Submitted'
      jobStatusMessages['WF'] = 'Pending Submission'
      jobStatusMessages['I']  = 'Idle'
      jobStatusMessages['Q']  = 'Queued'
      jobStatusMessages['H']  = 'Held'
      jobStatusMessages['R']  = 'Running'
      jobStatusMessages['C']  = 'Complete'
      jobStatusMessages['SE'] = 'Submission Error'
      jobStatusMessages['X']  = 'Marked For Deletion'
      jobStatusMessages['E']  = 'Exiting'
      jobStatusMessages['T']  = 'Moving'
      jobStatusMessages['W']  = 'Waiting'
      jobStatusMessages['S']  = 'Suspended'
      jobStatusMessages['D']  = 'Done'
      jobStatusMessages['CA'] = 'Cancelled'
      jobStatusMessages['CD'] = 'Completed'
      jobStatusMessages['CF'] = 'Configuring'
      jobStatusMessages['CG'] = 'Completing'
      jobStatusMessages['F']  = 'Failed'
      jobStatusMessages['NF'] = 'Node_Fail'
      jobStatusMessages['PD'] = 'Pending'
      jobStatusMessages['TO'] = 'Timeout'
      jobStatusMessages['CK'] = 'Checkpointing'
      jobStatusMessages['CP'] = 'Complete Pending'
      jobStatusMessages['DF'] = 'Deferred'
      jobStatusMessages['NQ'] = 'Not Queued'
      jobStatusMessages['NR'] = 'Not Run'
      jobStatusMessages['P']  = 'Pending'
      jobStatusMessages['EP'] = 'Preempt Pending'
      jobStatusMessages['XP'] = 'Reject Pending'
      jobStatusMessages['RM'] = 'Removed'
      jobStatusMessages['RP'] = 'Remove Pending'
      jobStatusMessages['MP'] = 'Resume Pending'
      jobStatusMessages['ST'] = 'Starting'
      jobStatusMessages['TX'] = 'Terminated'
      jobStatusMessages['V']  = 'Vacated'
      jobStatusMessages['VP'] = 'Vacate Pending'
      jobStatusMessages['HS'] = 'User & System Hold'
      jobStatusMessages['PT'] = 'Preempted'
      jobStatusMessages['RJ'] = 'Rejected'
      jobStatusMessages['SH'] = 'System Hold'

      try:
         jobStatusMessage = jobStatusMessages[jobStatus]
      except:
         log(traceback.format_exc())
         jobStatusMessage = 'Unknown Status'

      return(jobStatusMessage)


   def __getJobStatusState(self,
                           jobStatus):
# 'waiting'
# 'aborted'
# 'setting up'
# 'failed'
# 'executing'
# 'finished'

      jobStatusStates = {}
      jobStatusStates['N']  = 'waiting'
      jobStatusStates['WF'] = 'waiting'
      jobStatusStates['I']  = 'waiting'
      jobStatusStates['Q']  = 'waiting'
      jobStatusStates['H']  = 'waiting'
      jobStatusStates['R']  = 'executing'
      jobStatusStates['C']  = 'executing'
      jobStatusStates['SE'] = 'failed'
      jobStatusStates['X']  = 'aborted'
      jobStatusStates['E']  = 'executing'
      jobStatusStates['T']  = 'waiting'
      jobStatusStates['W']  = 'waiting'
      jobStatusStates['S']  = 'waiting'
      jobStatusStates['D']  = 'finished'
      jobStatusStates['CA'] = 'waiting'
      jobStatusStates['CD'] = 'executing'
      jobStatusStates['CF'] = 'waiting'
      jobStatusStates['CG'] = 'executing'
      jobStatusStates['F']  = 'waiting'
      jobStatusStates['NF'] = 'waiting'
      jobStatusStates['PD'] = 'waiting'
      jobStatusStates['TO'] = 'waiting'
      jobStatusStates['CK'] = 'executing'
      jobStatusStates['CP'] = 'executing'
      jobStatusStates['DF'] = 'waiting'
      jobStatusStates['NQ'] = 'waiting'
      jobStatusStates['NR'] = 'waiting'
      jobStatusStates['P']  = 'waiting'
      jobStatusStates['EP'] = 'waiting'
      jobStatusStates['XP'] = 'waiting'
      jobStatusStates['RM'] = 'waiting'
      jobStatusStates['RP'] = 'waiting'
      jobStatusStates['MP'] = 'waiting'
      jobStatusStates['ST'] = 'waiting'
      jobStatusStates['TX'] = 'waiting'
      jobStatusStates['V']  = 'waiting'
      jobStatusStates['VP'] = 'waiting'
      jobStatusStates['HS'] = 'waiting'
      jobStatusStates['PT'] = 'waiting'
      jobStatusStates['RJ'] = 'waiting'
      jobStatusStates['SH'] = 'waiting'

      try:
         jobStatusState = jobStatusStates[jobStatus]
      except:
         log(traceback.format_exc())
         jobStatusState = 'Unknown State'

      jobStatusReportOrders = {}
      jobStatusReportOrders['waiting']    = 5
      jobStatusReportOrders['aborted']    = 1
      jobStatusReportOrders['setting up'] = 6
      jobStatusReportOrders['failed']     = 3
      jobStatusReportOrders['executing']  = 4
      jobStatusReportOrders['finished']   = 2

      try:
         jobStatusReportOrder = jobStatusReportOrders[jobStatusState]
      except:
         jobStatusReportOrder = 99

      return(jobStatusState,jobStatusReportOrder)


   def waitForBatchJob(self,
                       siteMonitorDesignator,
                       remoteJobId,
                       knownSite=""):
      if remoteJobId != "":
         minimumDelay = 5       #  5 10 20 40 80 160 320
         maximumDelay = 320
         updateFrequency = 5
         maximumReportDelay = 320

         delayTime = 0
         sleepTime = minimumDelay
         nDelays = 0
         timeLastReported = delayTime
         currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
         if currentJobSite == "" or currentJobSite == '?':
            if knownSite != "":
               currentJobSite = knownSite
         jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
         if currentJobSite == "" or currentJobSite == '?':
            log("status:%s %s" % (currentJobStage,currentJobStatus))
            sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                              time.ctime()))
         else:
            log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
            sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    currentJobSite,time.ctime()))
         sys.stdout.flush()

         previousJobStatus = currentJobStatus
         previousJobStage  = currentJobStage
         previousJobSite   = currentJobSite
         while currentJobStatus != 'D':
            nDelays += 1
            time.sleep(sleepTime)
            delayTime += sleepTime
            if nDelays == updateFrequency:
               nDelays = 0
               sleepTime *= 2
               if sleepTime > maximumDelay:
                  sleepTime = maximumDelay
            currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
            if currentJobSite == "" or currentJobSite == '?':
               if knownSite != "":
                  currentJobSite = knownSite
            if currentJobStatus != previousJobStatus or currentJobStage != previousJobStage or currentJobSite != previousJobSite:
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
               previousJobStatus = currentJobStatus
               previousJobStage  = currentJobStage
               previousJobSite   = currentJobSite
               timeLastReported = delayTime
               sleepTime = minimumDelay
               nDelays = 0
            else:
               if delayTime >= (timeLastReported + maximumReportDelay):
                  jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                  if currentJobSite == "" or currentJobSite == '?':
                     sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                       time.ctime()))
                  else:
                     sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             currentJobSite,time.ctime()))
                  sys.stdout.flush()
                  timeLastReported = delayTime


   def waitForBatchJobs(self,
                        waitForJobsInfo,
                        abortGlobal):
      completeRemoteJobIndexes = []

      minimumDelay = 5       #  5 10 20 40 80 160 320
      maximumDelay = 320
      updateFrequency = 5
      maximumReportDelay = 320

      delayTime = 0
      sleepTime = minimumDelay
      nDelays = 0
      timeLastReported = delayTime

      previousJobStatuses = {}
      previousJobStages   = {}
      previousJobSites    = {}

      incompleteJobs = 0
      for instance in waitForJobsInfo:
         if waitForJobsInfo[instance]['recentJobStatus'] != 'D':
            if waitForJobsInfo[instance]['isBatchJob']:
               siteMonitorDesignator = waitForJobsInfo[instance]['siteMonitorDesignator']
               remoteJobId           = waitForJobsInfo[instance]['remoteJobId']
               knownSite             = waitForJobsInfo[instance]['knownSite']
               currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
               if currentJobSite == "" or currentJobSite == '?':
                  if knownSite != "":
                     currentJobSite = knownSite
               if currentJobSite != "" and currentJobSite != '?':
                  waitForJobsInfo[instance]['recentJobSite'] = currentJobSite
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
            else:
               currentJobStatus = 'D'
               currentJobStage  = 'Job'
               currentJobSite   = ''

            waitForJobsInfo[instance]['recentJobStatus'] = currentJobStatus
            if currentJobStatus == 'D':
               completeRemoteJobIndexes.append(instance)
            else:
               incompleteJobs += 1

            previousJobStatuses[instance] = currentJobStatus
            previousJobStages[instance]   = currentJobStage
            previousJobSites[instance]    = currentJobSite

      while (len(completeRemoteJobIndexes) == 0) and (incompleteJobs > 0) and not abortGlobal['abortAttempted']:
         nDelays += 1
         time.sleep(sleepTime)
         delayTime += sleepTime
         if nDelays == updateFrequency:
            nDelays = 0
            sleepTime *= 2
            if sleepTime > maximumDelay:
               sleepTime = maximumDelay

         for instance in waitForJobsInfo:
            if waitForJobsInfo[instance]['recentJobStatus'] != 'D':
               if waitForJobsInfo[instance]['isBatchJob']:
                  siteMonitorDesignator = waitForJobsInfo[instance]['siteMonitorDesignator']
                  remoteJobId           = waitForJobsInfo[instance]['remoteJobId']
                  knownSite             = waitForJobsInfo[instance]['knownSite']
                  previousJobStatus = previousJobStatuses[instance]
                  previousJobStage  = previousJobStages[instance]
                  previousJobSite   = previousJobSites[instance]
                  currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
                  if currentJobSite == "" or currentJobSite == '?':
                     if knownSite != "":
                        currentJobSite = knownSite
                  if currentJobSite != "" and currentJobSite != '?':
                     waitForJobsInfo[instance]['recentJobSite'] = currentJobSite
                  if currentJobStatus != previousJobStatus or \
                     currentJobStage != previousJobStage or \
                     currentJobSite != previousJobSite:
                     jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                     if currentJobSite == "" or currentJobSite == '?':
                        log("status:%s %s" % (currentJobStage,currentJobStatus))
                        sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          time.ctime()))
                     else:
                        log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                        sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                currentJobSite,time.ctime()))
                     sys.stdout.flush()
                     waitForJobsInfo[instance]['recentJobStatus'] = currentJobStatus
                     if currentJobStatus == 'D':
                        completeRemoteJobIndexes.append(instance)
                        incompleteJobs -= 1
                     previousJobStatuses[instance] = currentJobStatus
                     previousJobStages[instance]   = currentJobStage
                     previousJobSites[instance]    = currentJobSite
                     timeLastReported = delayTime
                     sleepTime = minimumDelay
                     nDelays = 0
                  else:
                     if delayTime >= (timeLastReported + maximumReportDelay):
                        jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                        if currentJobSite == "" or currentJobSite == '?':
                           sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             time.ctime()))
                        else:
                           sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                   currentJobSite,time.ctime()))
                        sys.stdout.flush()
                        timeLastReported = delayTime

      log("waitForBatchJobs: nCompleteRemoteJobIndexes = %d, nIncompleteJobs = %d, abortGlobal = %s" % \
                    (len(completeRemoteJobIndexes),incompleteJobs,abortGlobal['abortAttempted']))

      del previousJobStatuses
      del previousJobStages
      del previousJobSites

      return(completeRemoteJobIndexes)


   def __updateWorkflowStatusSheet(self,
                                   parameterCombinationsPath,
                                   wfInstances):
      parameterCombinationsDir  = os.path.dirname(parameterCombinationsPath)
      parameterCombinationsBase = os.path.basename(parameterCombinationsPath)
      if '.' in parameterCombinationsBase:
         parameterCombinationsBase = parameterCombinationsBase.split('.')[0]
      tmpParameterCombinationsFile = parameterCombinationsBase + '.tmp'
      tmpParameterCombinationsPath = os.path.join(parameterCombinationsDir,tmpParameterCombinationsFile)
      copyTmpFile = False

      fpCSVIn = open(parameterCombinationsPath,'rb')
      if fpCSVIn:
         csvReader = csv.reader(fpCSVIn)
         fpCSVOut = open(tmpParameterCombinationsPath,'wb')
         if fpCSVOut:
            csvWriter = csv.writer(fpCSVOut)
            csvWriter.writerow(('# command: ' + self.enteredCommand,))
            csvWriter.writerow(('# started: ' + self.startDate,))
            nCompleted = 0
            for instance in wfInstances:
               jobStatusState,jobStatusReportOrder = self.__getJobStatusState(wfInstances[instance]['jobStatus'])
               if jobStatusState in ['finished','failed','aborted']:
                  nCompleted += 1
            csvWriter.writerow(('# completed: %d/%d jobs' % (nCompleted,len(wfInstances)),))
            parameterNames = []
            while len(parameterNames) <= 1:
               parameterNames = csvReader.next()
            csvWriter.writerow(parameterNames)
#           for parameterCombination in csvReader:
#              instance = int(parameterCombination[0])
#              if instance in wfInstances:
#                 jobStatusState,jobStatusReportOrder = self.__getJobStatusState(wfInstances[instance]['jobStatus'])
#                 parameterCombination[1] = jobStatusState
#              csvWriter.writerow(parameterCombination)
            parameterCombinations = {}
            for parameterCombination in csvReader:
               instance = int(parameterCombination[0])
               if instance in wfInstances:
                  jobStatusState,jobStatusReportOrder = self.__getJobStatusState(wfInstances[instance]['jobStatus'])
                  parameterCombination[1] = jobStatusState
                  if not jobStatusReportOrder in parameterCombinations:
                     parameterCombinations[jobStatusReportOrder] = []
                  parameterCombinations[jobStatusReportOrder].append(parameterCombination)
            jobStatusReportOrders = parameterCombinations.keys()
            jobStatusReportOrders.sort()
            for jobStatusReportOrder in jobStatusReportOrders:
               for parameterCombination in parameterCombinations[jobStatusReportOrder]:
                  csvWriter.writerow(parameterCombination)
            fpCSVOut.close()
            copyTmpFile = True
         fpCSVIn.close()

      if copyTmpFile:
         os.rename(tmpParameterCombinationsPath,parameterCombinationsPath)


   def waitForWorkflowJobs(self,
                           waitForJobsInfo,
                           nInstances,
                           parameterCombinationsPath,
                           isClientTTY,
                           abortGlobal):
      completeRemoteJobIndexes = []

      minimumDelay = 5       #  5 10 20 40 80 160 320
      maximumDelay = 320
      updateFrequency = 5
      maximumReportDelay = 320

      delayTime = 0
      sleepTime = minimumDelay
      nDelays = 0
      timeLastReported = delayTime

      previousJobStatuses = {}
      previousJobStages   = {}
      previousJobSites    = {}

      executeInstance = nInstances+1
      incompleteJobs = 0
      if waitForJobsInfo[executeInstance]['recentJobStatus'] != 'D':
         if waitForJobsInfo[executeInstance]['isBatchJob']:
            siteMonitorDesignator = waitForJobsInfo[executeInstance]['siteMonitorDesignator']
            remoteJobId           = waitForJobsInfo[executeInstance]['remoteJobId']
            knownSite             = waitForJobsInfo[executeInstance]['knownSite']
            currentJobStatus,currentJobStage,currentJobSite,wfInstances = self.queryWorkflowStatus(siteMonitorDesignator,
                                                                                                   remoteJobId,nInstances)
            if currentJobSite == "" or currentJobSite == '?':
               if knownSite != "":
                  currentJobSite = knownSite
            if currentJobSite != "" and currentJobSite != '?':
               waitForJobsInfo[executeInstance]['recentJobSite'] = currentJobSite
            jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
            if currentJobSite == "" or currentJobSite == '?':
               log("status:%s %s" % (currentJobStage,currentJobStatus))
            else:
               log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
            if not isClientTTY:
               if currentJobSite == "" or currentJobSite == '?':
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
         else:
            currentJobStatus = 'D'
            currentJobStage  = 'Job'
            currentJobSite   = ''
            wfInstances      = {}

         instanceChangedStatus = False
         for wfInstance in wfInstances:
            if wfInstances[wfInstance]['jobStatus'] != waitForJobsInfo[wfInstance]['recentJobStatus']:
               waitForJobsInfo[wfInstance]['recentJobStatus'] = wfInstances[wfInstance]['jobStatus']
               instanceChangedStatus = True
            if ';' in wfInstances[wfInstance]['jobStage']:
               wfCurrentJobSite = wfInstances[wfInstance]['jobStage'].split(';')[1]
               if wfCurrentJobSite != "" and wfCurrentJobSite != '?':
                  waitForJobsInfo[wfInstance]['recentJobSite'] = wfCurrentJobSite
         self.__updateWorkflowStatusSheet(parameterCombinationsPath,wfInstances)
         del wfInstances

         waitForJobsInfo[executeInstance]['recentJobStatus'] = currentJobStatus
         if currentJobStatus == 'D':
            completeRemoteJobIndexes.append(executeInstance)
         else:
            incompleteJobs += 1

         previousJobStatuses[executeInstance] = currentJobStatus
         previousJobStages[executeInstance]   = currentJobStage
         previousJobSites[executeInstance]    = currentJobSite

      while (len(completeRemoteJobIndexes) == 0) and (incompleteJobs > 0) and not abortGlobal['abortAttempted']:
         nDelays += 1
         time.sleep(sleepTime)
         delayTime += sleepTime
         if nDelays == updateFrequency:
            nDelays = 0
            sleepTime *= 2
            if sleepTime > maximumDelay:
               sleepTime = maximumDelay

         if waitForJobsInfo[executeInstance]['recentJobStatus'] != 'D':
            if waitForJobsInfo[executeInstance]['isBatchJob']:
               siteMonitorDesignator = waitForJobsInfo[executeInstance]['siteMonitorDesignator']
               remoteJobId           = waitForJobsInfo[executeInstance]['remoteJobId']
               knownSite             = waitForJobsInfo[executeInstance]['knownSite']
               previousJobStatus = previousJobStatuses[executeInstance]
               previousJobStage  = previousJobStages[executeInstance]
               previousJobSite   = previousJobSites[executeInstance]
               currentJobStatus,currentJobStage,currentJobSite,wfInstances = self.queryWorkflowStatus(siteMonitorDesignator,
                                                                                                      remoteJobId,nInstances)
               instanceChangedStatus = False
               for wfInstance in wfInstances:
                  if wfInstances[wfInstance]['jobStatus'] != waitForJobsInfo[wfInstance]['recentJobStatus']:
                     waitForJobsInfo[wfInstance]['recentJobStatus'] = wfInstances[wfInstance]['jobStatus']
                     instanceChangedStatus = True
                  if ';' in wfInstances[wfInstance]['jobStage']:
                     wfCurrentJobSite = wfInstances[wfInstance]['jobStage'].split(';')[1]
                     if wfCurrentJobSite != "" and wfCurrentJobSite != '?':
                        waitForJobsInfo[wfInstance]['recentJobSite'] = wfCurrentJobSite
               if instanceChangedStatus:
                  self.__updateWorkflowStatusSheet(parameterCombinationsPath,wfInstances)
                  getInProgressPegasusJobStdFiles(waitForJobsInfo[executeInstance]['instanceDirectory'])
               del wfInstances

               if currentJobSite == "" or currentJobSite == '?':
                  if knownSite != "":
                     currentJobSite = knownSite
               if currentJobSite != "" and currentJobSite != '?':
                  waitForJobsInfo[executeInstance]['recentJobSite'] = currentJobSite
               if currentJobStatus != previousJobStatus or \
                  currentJobStage != previousJobStage or \
                  currentJobSite != previousJobSite:
                  jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                  if currentJobSite == "" or currentJobSite == '?':
                     log("status:%s %s" % (currentJobStage,currentJobStatus))
                  else:
                     log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  if not isClientTTY:
                     if currentJobSite == "" or currentJobSite == '?':
                        sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          time.ctime()))
                     else:
                        sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                currentJobSite,time.ctime()))
                     sys.stdout.flush()
                  waitForJobsInfo[executeInstance]['recentJobStatus'] = currentJobStatus
                  if currentJobStatus == 'D':
                     completeRemoteJobIndexes.append(executeInstance)
                     incompleteJobs -= 1
                  previousJobStatuses[executeInstance] = currentJobStatus
                  previousJobStages[executeInstance]   = currentJobStage
                  previousJobSites[executeInstance]    = currentJobSite
                  timeLastReported = delayTime
                  sleepTime = minimumDelay
                  nDelays = 0
               else:
                  if delayTime >= (timeLastReported + maximumReportDelay):
                     jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                     if not isClientTTY:
                        if currentJobSite == "" or currentJobSite == '?':
                           sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             time.ctime()))
                        else:
                           sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                   currentJobSite,time.ctime()))
                        sys.stdout.flush()
                     timeLastReported = delayTime

      log("waitForWorkflowJobs: nCompleteRemoteJobIndexes = %d, nIncompleteJobs = %d, abortGlobal = %s" % \
                    (len(completeRemoteJobIndexes),incompleteJobs,abortGlobal['abortAttempted']))

      del previousJobStatuses
      del previousJobStages
      del previousJobSites

      return(completeRemoteJobIndexes)


   def waitForKilledBatchJobs(self,
                              waitForJobsInfo):
      minimumDelay = 5       #  5 10 20 40 80 160 320
      maximumDelay = 30
      updateFrequency = 5
      maximumReportDelay = 30

      delayTime = 0
      sleepTime = minimumDelay
      nDelays = 0
      timeLastReported = delayTime

      previousJobStatuses = {}
      previousJobStages   = {}
      previousJobSites    = {}

      incompleteJobs = 0
      for instance in waitForJobsInfo:
         if waitForJobsInfo[instance]['recentJobStatus'] == 'K':
            if waitForJobsInfo[instance]['isBatchJob']:
               siteMonitorDesignator = waitForJobsInfo[instance]['siteMonitorDesignator']
               remoteJobId           = waitForJobsInfo[instance]['remoteJobId']
               knownSite             = waitForJobsInfo[instance]['knownSite']
               currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)

               if currentJobSite == "" or currentJobSite == '?':
                  if knownSite != "":
                     currentJobSite = knownSite
               if currentJobSite != "" and currentJobSite != '?':
                  waitForJobsInfo[instance]['recentJobSite'] = currentJobSite
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if jobStatusMessage == 'Unknown Status':
                  log("waitForKilledBatchJobs: siteMonitor= %s,remoteJobId= %s,jobStatus= %s,jobStage= %s,jobSite= %s" % \
                                        (siteMonitorDesignator,remoteJobId,currentJobStatus,currentJobStage,currentJobSite))
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
            else:
               currentJobStatus = 'D'
               currentJobStage  = 'Job'
               currentJobSite   = ''

            if currentJobStatus == 'D':
               waitForJobsInfo[instance]['recentJobStatus'] = 'KD'
            else:
               incompleteJobs += 1

            previousJobStatuses[instance] = currentJobStatus
            previousJobStages[instance]   = currentJobStage
            previousJobSites[instance]    = currentJobSite

      while incompleteJobs > 0:
         nDelays += 1
         time.sleep(sleepTime)
         delayTime += sleepTime
         if nDelays == updateFrequency:
            nDelays = 0
            sleepTime *= 2
            if sleepTime > maximumDelay:
               sleepTime = maximumDelay

         for instance in waitForJobsInfo:
            if waitForJobsInfo[instance]['recentJobStatus'] == 'K':
               if waitForJobsInfo[instance]['isBatchJob']:
                  siteMonitorDesignator = waitForJobsInfo[instance]['siteMonitorDesignator']
                  remoteJobId           = waitForJobsInfo[instance]['remoteJobId']
                  knownSite             = waitForJobsInfo[instance]['knownSite']
                  previousJobStatus = previousJobStatuses[instance]
                  previousJobStage  = previousJobStages[instance]
                  previousJobSite   = previousJobSites[instance]
                  currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)

                  if currentJobSite == "" or currentJobSite == '?':
                     if knownSite != "":
                        currentJobSite = knownSite
                  if currentJobSite != "" and currentJobSite != '?':
                     waitForJobsInfo[instance]['recentJobSite'] = currentJobSite
                  if currentJobStatus != previousJobStatus or \
                     currentJobStage != previousJobStage or \
                     currentJobSite != previousJobSite:
                     jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                     if jobStatusMessage == 'Unknown Status':
                        log("waitForKilledBatchJobs: siteMonitor= %s,remoteJobId= %s,jobStatus= %s,jobStage= %s,jobSite= %s" % \
                                              (siteMonitorDesignator,remoteJobId,currentJobStatus,currentJobStage,currentJobSite))
                     if currentJobSite == "" or currentJobSite == '?':
                        log("status:%s %s" % (currentJobStage,currentJobStatus))
                        sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          time.ctime()))
                     else:
                        log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                        sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                currentJobSite,time.ctime()))
                     sys.stdout.flush()
                     if currentJobStatus == 'D':
                        waitForJobsInfo[instance]['recentJobStatus'] = 'KD'
                        incompleteJobs -= 1
                     previousJobStatuses[instance] = currentJobStatus
                     previousJobStages[instance]   = currentJobStage
                     previousJobSites[instance]    = currentJobSite
                     timeLastReported = delayTime
                     sleepTime = minimumDelay
                     nDelays = 0
                  else:
                     if delayTime >= (timeLastReported + maximumReportDelay):
                        jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                        if currentJobSite == "" or currentJobSite == '?':
                           sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             time.ctime()))
                        else:
                           sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                   currentJobSite,time.ctime()))
                        sys.stdout.flush()
                        timeLastReported = delayTime

      del previousJobStatuses
      del previousJobStages
      del previousJobSites


