#
# @package      hubzero-submit-distributor
# @file         RemoteJobMonitor.py
# @author       Steve Clark <clarks@purdue.edu>
# @copyright    Copyright 2004-2011 Purdue University. All rights reserved.
# @license      http://www.gnu.org/licenses/lgpl-3.0.html LGPLv3
#
# Copyright (c) 2004-2011 Purdue University
# All rights reserved.
#
# This file is part of: The HUBzero(R) Platform for Scientific Collaboration
#
# The HUBzero(R) Platform for Scientific Collaboration (HUBzero) is free
# software: you can redistribute it and/or modify it under the terms of
# the GNU Lesser General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# HUBzero is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# HUBzero is a registered trademark of Purdue University.
#
import sys
import time
import traceback

from LogMessage  import logID as log
from MessageCore import MessageCore

class RemoteJobMonitor(MessageCore):
   def __init__(self,
                host,
                port,
                repeatDelay=5,
                fixedBufferSize=64):
      MessageCore.__init__(self,listenerHost=host,listenerPort=port,repeatDelay=repeatDelay)
      self.fixedBufferSize = fixedBufferSize


   def postNewJobSubmission(self,
                            siteMonitorDesignator,
                            remoteJobId):
      queryMessage = "S:" + siteMonitorDesignator + " " + remoteJobId
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      log("confirmation: S(%d):%s" % (nTry,response))


   def queryRemoteJobStatus(self,
                            siteMonitorDesignator,
                            remoteJobId):
      queryMessage = "Q:" + siteMonitorDesignator + " " + remoteJobId
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      if nTry > 1:
         log("confirmation: Q(%d):%s" % (nTry,response))

      jobStatus,jobStage = response.strip().split()
      if ';' in jobStage:
         jobStage,jobSite = jobStage.split(';')
      else:
         jobSite = '?'
      if jobStage == '?':
         jobStage = "Job"

      return(jobStatus,jobStage,jobSite)


   def terminateRemoteJob(self,
                          siteMonitorDesignator,
                          remoteJobId):
      queryMessage = "T:" + siteMonitorDesignator + " " + remoteJobId
      nTry,response = self.requestMessageResponse(queryMessage,
                                                  self.fixedBufferSize,
                                                  self.fixedBufferSize)

      log("confirmation: T(%d):%s" % (nTry,response))


   def queryRemoteActiveJobStatus(self,
                                  siteMonitorDesignator,
                                  remoteJobId):
      queryMessage = "R:" + siteMonitorDesignator + " " + remoteJobId
      nTry,report,lastReportTime = self.requestMessageTimestampResponse(queryMessage,
                                                                        self.fixedBufferSize,
                                                                        self.fixedBufferSize)

      if nTry > 1:
         log("confirmation: R(%d):%s" % (nTry,lastReportTime))

      return(float(lastReportTime),report)


   def __getJobStatusMessage(self,
                             jobStatus):
      jobStatusMessages = {}
      jobStatusMessages['N']  = 'Submitted'
      jobStatusMessages['I']  = 'Idle'
      jobStatusMessages['Q']  = 'Queued'
      jobStatusMessages['H']  = 'Held'
      jobStatusMessages['R']  = 'Running'
      jobStatusMessages['C']  = 'Complete'
      jobStatusMessages['SE'] = 'Submission Error'
      jobStatusMessages['X']  = 'Marked For Deletion'
      jobStatusMessages['E']  = 'Exiting'
      jobStatusMessages['T']  = 'Moving'
      jobStatusMessages['W']  = 'Waiting'
      jobStatusMessages['S']  = 'Suspended'
      jobStatusMessages['D']  = 'Done'
      jobStatusMessages['CA'] = 'Cancelled'
      jobStatusMessages['CD'] = 'Completed'
      jobStatusMessages['CG'] = 'Completing'
      jobStatusMessages['F']  = 'Failed'
      jobStatusMessages['NF'] = 'Node_Fail'
      jobStatusMessages['PD'] = 'Pending'
      jobStatusMessages['TO'] = 'Timeout'
      jobStatusMessages['CK'] = 'Checkpointing'
      jobStatusMessages['CP'] = 'Complete Pending'
      jobStatusMessages['DF'] = 'Deferred'
      jobStatusMessages['NQ'] = 'Not Queued'
      jobStatusMessages['NR'] = 'Not Run'
      jobStatusMessages['P']  = 'Pending'
      jobStatusMessages['EP'] = 'Preempt Pending'
      jobStatusMessages['XP'] = 'Reject Pending'
      jobStatusMessages['RM'] = 'Removed'
      jobStatusMessages['RP'] = 'Remove Pending'
      jobStatusMessages['MP'] = 'Resume Pending'
      jobStatusMessages['ST'] = 'Starting'
      jobStatusMessages['TX'] = 'Terminated'
      jobStatusMessages['V']  = 'Vacated'
      jobStatusMessages['VP'] = 'Vacate Pending'
      jobStatusMessages['HS'] = 'User & System Hold'
      jobStatusMessages['PT'] = 'Preempted'
      jobStatusMessages['RJ'] = 'Rejected'
      jobStatusMessages['SH'] = 'System Hold'

      try:
         jobStatusMessage = jobStatusMessages[jobStatus]
      except:
         log(traceback.format_exc())
         jobStatusMessage = 'Unknown Status'

      return(jobStatusMessage)


   def waitForBatchJob(self,
                       siteMonitorDesignator,
                       remoteJobId,
                       knownSite=""):
      if remoteJobId != "":
         minimumDelay = 5       #  5 10 20 40 80 160 320
         maximumDelay = 320
         updateFrequency = 5
         maximumReportDelay = 320

         delayTime = 0
         sleepTime = minimumDelay
         nDelays = 0
         timeLastReported = delayTime
         currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
         if currentJobSite == "" or currentJobSite == '?':
            if knownSite != "":
               currentJobSite = knownSite
         jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
         if currentJobSite == "" or currentJobSite == '?':
            log("status:%s %s" % (currentJobStage,currentJobStatus))
            sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                              time.ctime()))
         else:
            log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
            sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    currentJobSite,time.ctime()))
         sys.stdout.flush()

         previousJobStatus = currentJobStatus
         previousJobStage  = currentJobStage
         previousJobSite   = currentJobSite
         while currentJobStatus != 'D':
            nDelays += 1
            time.sleep(sleepTime)
            delayTime += sleepTime
            if nDelays == updateFrequency:
               nDelays = 0
               sleepTime *= 2
               if sleepTime > maximumDelay:
                  sleepTime = maximumDelay
            currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
            if currentJobSite == "" or currentJobSite == '?':
               if knownSite != "":
                  currentJobSite = knownSite
            if currentJobStatus != previousJobStatus or currentJobStage != previousJobStage or currentJobSite != previousJobSite:
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
               previousJobStatus = currentJobStatus
               previousJobStage  = currentJobStage
               previousJobSite   = currentJobSite
               timeLastReported = delayTime
               sleepTime = minimumDelay
               nDelays = 0
            else:
               if delayTime >= (timeLastReported + maximumReportDelay):
                  jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                  if currentJobSite == "" or currentJobSite == '?':
                     sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                       time.ctime()))
                  else:
                     sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             currentJobSite,time.ctime()))
                  sys.stdout.flush()
                  timeLastReported = delayTime


   def waitForBatchJobs(self,
                        waitForJobsInfo,
                        abortGlobal):
      completeRemoteJobIndexes = []

      minimumDelay = 5       #  5 10 20 40 80 160 320
      maximumDelay = 320
      updateFrequency = 5
      maximumReportDelay = 320

      delayTime = 0
      sleepTime = minimumDelay
      nDelays = 0
      timeLastReported = delayTime

      previousJobStatuses = {}
      previousJobStages   = {}
      previousJobSites    = {}

      incompleteJobs = 0
      for trial in waitForJobsInfo:
         if waitForJobsInfo[trial]['recentJobStatus'] != 'D':
            if waitForJobsInfo[trial]['isBatchJob']:
               siteMonitorDesignator = waitForJobsInfo[trial]['siteMonitorDesignator']
               remoteJobId           = waitForJobsInfo[trial]['remoteJobId']
               knownSite             = waitForJobsInfo[trial]['knownSite']
               currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
               if currentJobSite == "" or currentJobSite == '?':
                  if knownSite != "":
                     currentJobSite = knownSite
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
            else:
               currentJobStatus = 'D'
               currentJobStage  = 'Job'
               currentJobSite   = ''

            waitForJobsInfo[trial]['recentJobStatus'] = currentJobStatus
            if currentJobStatus == 'D':
               completeRemoteJobIndexes.append(trial)
            else:
               incompleteJobs += 1

            previousJobStatuses[trial] = currentJobStatus
            previousJobStages[trial]   = currentJobStage
            previousJobSites[trial]    = currentJobSite

      while (len(completeRemoteJobIndexes) == 0) and (incompleteJobs > 0) and not abortGlobal['abortAttempted']:
         nDelays += 1
         time.sleep(sleepTime)
         delayTime += sleepTime
         if nDelays == updateFrequency:
            nDelays = 0
            sleepTime *= 2
            if sleepTime > maximumDelay:
               sleepTime = maximumDelay

         for trial in waitForJobsInfo:
            if waitForJobsInfo[trial]['recentJobStatus'] != 'D':
               if waitForJobsInfo[trial]['isBatchJob']:
                  siteMonitorDesignator = waitForJobsInfo[trial]['siteMonitorDesignator']
                  remoteJobId           = waitForJobsInfo[trial]['remoteJobId']
                  knownSite             = waitForJobsInfo[trial]['knownSite']
                  previousJobStatus = previousJobStatuses[trial]
                  previousJobStage  = previousJobStages[trial]
                  previousJobSite   = previousJobSites[trial]
                  currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)
                  if currentJobSite == "" or currentJobSite == '?':
                     if knownSite != "":
                        currentJobSite = knownSite
                  if currentJobStatus != previousJobStatus or \
                     currentJobStage != previousJobStage or \
                     currentJobSite != previousJobSite:
                     jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                     if currentJobSite == "" or currentJobSite == '?':
                        log("status:%s %s" % (currentJobStage,currentJobStatus))
                        sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          time.ctime()))
                     else:
                        log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                        sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                currentJobSite,time.ctime()))
                     sys.stdout.flush()
                     waitForJobsInfo[trial]['recentJobStatus'] = currentJobStatus
                     if currentJobStatus == 'D':
                        completeRemoteJobIndexes.append(trial)
                        incompleteJobs -= 1
                     previousJobStatuses[trial] = currentJobStatus
                     previousJobStages[trial]   = currentJobStage
                     previousJobSites[trial]    = currentJobSite
                     timeLastReported = delayTime
                     sleepTime = minimumDelay
                     nDelays = 0
                  else:
                     if delayTime >= (timeLastReported + maximumReportDelay):
                        jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                        if currentJobSite == "" or currentJobSite == '?':
                           sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             time.ctime()))
                        else:
                           sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                   currentJobSite,time.ctime()))
                        sys.stdout.flush()
                        timeLastReported = delayTime

      log("waitForBatchJobs: nCompleteRemoteJobIndexes = %d, nIncompleteJobs = %d, abortGlobal = %s" % \
                    (len(completeRemoteJobIndexes),incompleteJobs,abortGlobal['abortAttempted']))

      del previousJobStatuses
      del previousJobStages
      del previousJobSites

      return(completeRemoteJobIndexes)


   def waitForKilledBatchJobs(self,
                              waitForJobsInfo):
      minimumDelay = 5       #  5 10 20 40 80 160 320
      maximumDelay = 30
      updateFrequency = 5
      maximumReportDelay = 30

      delayTime = 0
      sleepTime = minimumDelay
      nDelays = 0
      timeLastReported = delayTime

      previousJobStatuses = {}
      previousJobStages   = {}
      previousJobSites    = {}

      incompleteJobs = 0
      for trial in waitForJobsInfo:
         if waitForJobsInfo[trial]['recentJobStatus'] == 'K':
            if waitForJobsInfo[trial]['isBatchJob']:
               siteMonitorDesignator = waitForJobsInfo[trial]['siteMonitorDesignator']
               remoteJobId           = waitForJobsInfo[trial]['remoteJobId']
               knownSite             = waitForJobsInfo[trial]['knownSite']
               currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)

               if currentJobSite == "" or currentJobSite == '?':
                  if knownSite != "":
                     currentJobSite = knownSite
               jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
               if jobStatusMessage == 'Unknown Status':
                  log("waitForKilledBatchJobs: siteMonitor= %s,remoteJobId= %s,jobStatus= %s,jobStage= %s,jobSite= %s" % \
                                        (siteMonitorDesignator,remoteJobId,currentJobStatus,currentJobStage,currentJobSite))
               if currentJobSite == "" or currentJobSite == '?':
                  log("status:%s %s" % (currentJobStage,currentJobStatus))
                  sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                    time.ctime()))
               else:
                  log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                  sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          currentJobSite,time.ctime()))
               sys.stdout.flush()
            else:
               currentJobStatus = 'D'
               currentJobStage  = 'Job'
               currentJobSite   = ''

            if currentJobStatus == 'D':
               waitForJobsInfo[trial]['recentJobStatus'] = 'KD'
            else:
               incompleteJobs += 1

            previousJobStatuses[trial] = currentJobStatus
            previousJobStages[trial]   = currentJobStage
            previousJobSites[trial]    = currentJobSite

      while incompleteJobs > 0:
         nDelays += 1
         time.sleep(sleepTime)
         delayTime += sleepTime
         if nDelays == updateFrequency:
            nDelays = 0
            sleepTime *= 2
            if sleepTime > maximumDelay:
               sleepTime = maximumDelay

         for trial in waitForJobsInfo:
            if waitForJobsInfo[trial]['recentJobStatus'] == 'K':
               if waitForJobsInfo[trial]['isBatchJob']:
                  siteMonitorDesignator = waitForJobsInfo[trial]['siteMonitorDesignator']
                  remoteJobId           = waitForJobsInfo[trial]['remoteJobId']
                  knownSite             = waitForJobsInfo[trial]['knownSite']
                  previousJobStatus = previousJobStatuses[trial]
                  previousJobStage  = previousJobStages[trial]
                  previousJobSite   = previousJobSites[trial]
                  currentJobStatus,currentJobStage,currentJobSite = self.queryRemoteJobStatus(siteMonitorDesignator,remoteJobId)

                  if currentJobSite == "" or currentJobSite == '?':
                     if knownSite != "":
                        currentJobSite = knownSite
                  if currentJobStatus != previousJobStatus or \
                     currentJobStage != previousJobStage or \
                     currentJobSite != previousJobSite:
                     jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                     if jobStatusMessage == 'Unknown Status':
                        log("waitForKilledBatchJobs: siteMonitor= %s,remoteJobId= %s,jobStatus= %s,jobStage= %s,jobSite= %s" % \
                                              (siteMonitorDesignator,remoteJobId,currentJobStatus,currentJobStage,currentJobSite))
                     if currentJobSite == "" or currentJobSite == '?':
                        log("status:%s %s" % (currentJobStage,currentJobStatus))
                        sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                          time.ctime()))
                     else:
                        log("status:%s %s %s" % (currentJobStage,currentJobStatus,currentJobSite))
                        sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                currentJobSite,time.ctime()))
                     sys.stdout.flush()
                     if currentJobStatus == 'D':
                        waitForJobsInfo[trial]['recentJobStatus'] = 'KD'
                        incompleteJobs -= 1
                     previousJobStatuses[trial] = currentJobStatus
                     previousJobStages[trial]   = currentJobStage
                     previousJobSites[trial]    = currentJobSite
                     timeLastReported = delayTime
                     sleepTime = minimumDelay
                     nDelays = 0
                  else:
                     if delayTime >= (timeLastReported + maximumReportDelay):
                        jobStatusMessage = self.__getJobStatusMessage(currentJobStatus)
                        if currentJobSite == "" or currentJobSite == '?':
                           sys.stdout.write("(%s) %s %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                             time.ctime()))
                        else:
                           sys.stdout.write("(%s) %s %s at %s %s\n" % (remoteJobId,currentJobStage,jobStatusMessage, \
                                                                                                   currentJobSite,time.ctime()))
                        sys.stdout.flush()
                        timeLastReported = delayTime

      del previousJobStatuses
      del previousJobStages
      del previousJobSites

