#!/usr/bin/env python3
#
# @package      hubzero-submit-monitors
# @file         monitorJobSiteSQL.py
# @copyright    Copyright (c) 2014-2020 The Regents of the University of California.
# @license      http://opensource.org/licenses/MIT MIT
#
# Copyright (c) 2014-2020 The Regents of the University of California.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#
# HUBzero is a registered trademark of The Regents of the University of California.
#
# ----------------------------------------------------------------------
#  monitorJobSiteSQL.py
#
#  script which launches a single remote monitor and
#  feeds job status updates to the central job monitor.
#
import sys
import os
import re
import subprocess
import shlex
import select
import signal
import socket
import time
import json
import base64
import traceback
import logging
from errno import EPIPE,ECONNRESET,EINTR

from hubzero.submit.LogMessage            import getLogPIDMessage as getLogMessage
from hubzero.submit.JobInfo               import JobInfo
from hubzero.submit.DaemonsInfo           import DaemonsInfo
from hubzero.submit.InfosInfo             import InfosInfo
from hubzero.submit.MonitorsInfo          import MonitorsInfo
from hubzero.submit.RemoteTunnelMonitor   import RemoteTunnelMonitor
from hubzero.submit.RemoteIdentityManager import RemoteIdentityManager
from hubzero.submit.MySQLDatabase         import MySQLDatabase as MyDatabase

MONITORROOT = os.path.join(os.sep,'opt','submit')

MONITORLOGLOCATION = os.path.join(os.sep,'var','log','submit','monitors')
MONITORLOGFILENAME = "monitorJobSiteSQL.log"
APPLICATIONLOGGER  = logging.getLogger('')

CONFIGURATIONDIRECTORY   = os.path.join(os.sep,'etc','submit')
MONITORCONFIGURATIONFILE = 'jobmonitor.conf'
DAEMONSCONFIGURATIONFILE = 'daemons.conf'
INFOSCONFIGURATIONFILE   = 'infos.conf'

BINDIRECTORY = os.path.join(MONITORROOT,'bin')
IDENTITYUSER = 'gridman'

ACTIVITYUPDATEINTERVAL = 15.
NOOPCHECKINTERVAL      = 60
NOOPINACTIVITYINTERVAL = 30*NOOPCHECKINTERVAL

def openLogger(logDirectory,
               hubLogFile):
   class EmptyFilter(logging.Filter):
      """
      This is a filter which rejects empty messages

      """

      def filter(self,record):
         if record.getMessage() == "":
            emptyRecord = True
         else:
            emptyRecord = False

         return(not emptyRecord)

   APPLICATIONLOGGER.setLevel(logging.DEBUG)

   hubLogPath = os.path.join(logDirectory,hubLogFile)
   logHandler = logging.FileHandler(hubLogPath)
   fdLogFile = logHandler.stream.fileno()

   emptyFilter = EmptyFilter()
   logHandler.addFilter(emptyFilter)

   logFormatter = logging.Formatter('%(asctime)s %(message)s','%s [%a %b %d %H:%M:%S %Y]')
   logHandler.setFormatter(logFormatter)
   APPLICATIONLOGGER.addHandler(logHandler)

   return(fdLogFile)


def daemonize(fdLogFile):
   if fdLogFile != sys.stdout.fileno():
      try:
         devnull = open("/dev/null",'r')
         try:
            os.dup2(devnull.fileno(),sys.stdin.fileno())
            os.dup2(fdLogFile,sys.stdout.fileno())
            os.dup2(fdLogFile,sys.stderr.fileno())
         except OSError:
            APPLICATIONLOGGER.log(logging.ERROR,getLogMessage("file descriptor dup failed"))
      except (IOError,OSError):
         APPLICATIONLOGGER.log(logging.ERROR,getLogMessage("%s could not be opened" % ("/dev/null")))

   if os.fork() != 0:
      os.wait()
      sys.exit(0)
   else:
      os.setsid()
      pid = os.fork()
      if pid != 0:
         sys.exit(0)

   time.sleep(2)


class MonitorJobSite:
   def __init__(self,
                configurationDirectory,
                monitorConfigurationFile,
                daemonsConfigurationFile,
                infosConfigurationFile,
                identityUser,
                siteDesignator):
      self.logger = logging.getLogger(__name__)

      self.configurationDirectory   = configurationDirectory
      self.monitorConfigFilePath    = os.path.join(configurationDirectory,monitorConfigurationFile)
      self.daemonsConfigurationFile = daemonsConfigurationFile
      self.infosConfigurationFile   = infosConfigurationFile

      configFilePath         = os.path.join(self.configurationDirectory,self.daemonsConfigurationFile)
      daemonsInfo            = DaemonsInfo(configFilePath)
      self.tunnelListenURI   = daemonsInfo.getDaemonListenURI('tunnelMonitor','tcp')
      self.identityListenURI = daemonsInfo.getDaemonListenURI('identitiesManager','tcp')

      self.identityUser   = identityUser
      self.siteDesignator = siteDesignator

      self.configData = {}
      self.myDatabase = None

      self.bufferSize              = 4096
      self.siteMonitorPid          = 0
      self.maximumJobsPushedToSite = 50

      self.infosInfo             = None
      self.monitorsInfo          = None
      self.remoteTunnelMonitor   = None
      self.remoteIdentityManager = None

      self.sshCommand       = []
      self.tunnelDesignator = ""

      self.exitSystemCallOnInterrupt = False

      signal.signal(signal.SIGINT,self.sigINT_handler)
      signal.signal(signal.SIGHUP,self.sigHUP_handler)
      signal.signal(signal.SIGQUIT,self.sigQUIT_handler)
      signal.signal(signal.SIGABRT,self.sigABRT_handler)
      signal.signal(signal.SIGTERM,self.sigTERM_handler)


   def configure(self):
      sectionPattern  = re.compile('(\s*\[)([^\s]*)(]\s*)')
      keyValuePattern = re.compile('( *)(\w*)( *= *)(.*[^\s$])( *)')
      commentPattern  = re.compile('\s*#.*')
      inMonitorSection = False

      configured = False
      try:
         fpConfig = open(self.monitorConfigFilePath,'r')
         try:
            eof = False
            while not eof:
               record = fpConfig.readline()
               if record != "":
                  record = commentPattern.sub("",record)
                  if   sectionPattern.match(record):
                     sectionName = sectionPattern.match(record).group(2)
                     inMonitorSection = (sectionName == 'monitor')
                     if inMonitorSection:
                        self.configData = {'mysqlHost':"",
                                           'mysqlUser':"",
                                           'mysqlPassword':"",
                                           'mysqlDB':""
                                          }
                  elif inMonitorSection:
                     if keyValuePattern.match(record):
                        key,value = keyValuePattern.match(record).group(2,4)
                        if key in self.configData:
                           if   isinstance(self.configData[key],list):
                              self.configData[key] = [e.strip() for e in value.split(',')]
                           elif isinstance(self.configData[key],bool):
                              self.configData[key] = bool(value.lower() == 'true')
                           elif isinstance(self.configData[key],float):
                              self.configData[key] = float(value)
                           elif isinstance(self.configData[key],int):
                              self.configData[key] = int(value)
                           elif isinstance(self.configData[key],dict):
                              try:
                                 sampleKey   = self.configData[key].keys()[0]
                                 sampleValue = self.configData[key][sampleKey]
                              except:
                                 sampleKey   = "key"
                                 sampleValue = "value"
                              self.configData[key] = {}
                              for e in value.split(','):
                                 dictKey,dictValue = e.split(':')
                                 if isinstance(sampleKey,int):
                                    dictKey = int(dictKey)
                                 if   isinstance(sampleValue,int):
                                    dictValue = int(dictValue)
                                 elif isinstance(sampleValue,float):
                                    dictValue = float(dictValue)
                                 elif isinstance(sampleValue,bool):
                                    dictValue = bool(dictValue.lower() == 'true')
                                 self.configData[key][dictKey] = dictValue
                           else:
                              self.configData[key] = value
                        else:
                           self.logger.log(logging.WARNING,getLogMessage("Undefined key = value pair %s = %s" % (key,value)))
               else:
                  eof = True
                  configured = True
         except (IOError,OSError):
            self.logger.log(logging.ERROR,getLogMessage("%s could not be read" % (self.monitorConfigFilePath)))
         finally:
            fpConfig.close()
      except (IOError,OSError):
         self.logger.log(logging.ERROR,getLogMessage("%s could not be opened" % (self.monitorConfigFilePath)))

      if configured:
         if self.configData['mysqlHost'] == "" or \
            self.configData['mysqlUser'] == "" or \
            self.configData['mysqlPassword'] == "" or \
            self.configData['mysqlDB'] == "":
            self.logger.log(logging.ERROR,getLogMessage("MySQL information missing from %s" % (self.monitorConfigFilePath)))
            configured = False
         else:
            self.myDatabase = MyDatabase(self.configData['mysqlHost'],self.configData['mysqlUser'],self.configData['mysqlPassword'])

      return(configured)


   def terminate(self):
      if self.siteMonitorPid:
         self.logger.log(logging.INFO,getLogMessage("Send TERM to child ssh process"))
         try:
            os.kill(self.siteMonitorPid,signal.SIGTERM)
         except:
            pass


   def sigGEN_handler(self,
                      signalNumber,
                      frame):
      self.terminate()
      if self.exitSystemCallOnInterrupt:
         if sys.version_info > (3,):
            raise InterruptedError(EINTR,"System Call Interrupt")


   def sigINT_handler(self,
                      signalNumber,
                      frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGINT!"))
      self.sigGEN_handler(signalNumber,frame)


   def sigHUP_handler(self,
                      signalNumber,
                      frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGHUP!"))
      self.sigGEN_handler(signalNumber,frame)


   def sigQUIT_handler(self,
                       signalNumber,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGQUIT!"))
      self.sigGEN_handler(signalNumber,frame)


   def sigABRT_handler(self,
                       signalNumber,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGABRT!"))
      self.sigGEN_handler(signalNumber,frame)


   def sigTERM_handler(self,
                       signalNumber,
                       frame):
      self.logger.log(logging.INFO,getLogMessage("Received SIGTERM!"))
      self.sigGEN_handler(signalNumber,frame)


   def buildSSHCommand(self):
      exitStatus       = 1
      sshCommand       = []
      tunnelDesignator = ""

      monitorInfo = self.monitorsInfo.getMonitorInfo(self.siteDesignator)
      if monitorInfo:
         exitStatus = 0
         if monitorInfo['remoteMonitorCommand']:
            identityPaths = self.remoteIdentityManager.queryUserIdentities(monitorInfo['identityManager'],
                                                                           self.identityUser)
            try:
               monitorName = monitorInfo['monitorName']
               if monitorInfo['venueMechanism'] == 'ssh':
                  venue                = monitorInfo['venue']
                  venuePort            = monitorInfo['venuePort']
                  sshOptions           = monitorInfo['sshOptions']
                  tunnelDesignator     = monitorInfo['tunnelDesignator']
                  user                 = monitorInfo['remoteUser']
                  remoteMonitorCommand = monitorInfo['remoteMonitorCommand']
                  sshIdentity          = identityPaths['communitySSH']

                  sshCommand.append("ssh")
                  sshCommand.append("-T")
                  sshCommand.append("-x")
                  sshCommand.append("-a")
                  if sshOptions:
                     sshCommand += sshOptions.split()
                  sshCommand.append("-i")
                  sshCommand.append(sshIdentity)
                  if tunnelDesignator == "":
                     sshCommand.append("-p")
                     sshCommand.append("%d" % (venuePort))
                     sshCommand.append("%s@%s" % (user,venue))
                     sshCommand.append(remoteMonitorCommand)
                     sshCommand.append(monitorName)
                  else:
                     tunnelAddress,tunnelPort = self.remoteTunnelMonitor.getTunnelAddressPort(tunnelDesignator)
                     sshCommand.append("-p")
                     sshCommand.append(tunnelPort)
                     sshCommand.append("%s@%s" % (user,tunnelAddress))
                     sshCommand.append(remoteMonitorCommand)
                     sshCommand.append(monitorName)
            except:
               exitStatus = 2
               self.logger.log(logging.ERROR,getLogMessage("Build sshCommand failed for %s" % (self.siteDesignator)))
               self.logger.log(logging.ERROR,getLogMessage(traceback.format_exc()))
            else:
               self.sshCommand       = sshCommand
               self.tunnelDesignator = tunnelDesignator

      return(exitStatus)


   JOBPENDINGSTATECREATED  = 1 << 0
   JOBPENDINGSTATESELECTED = 1 << 1


   def getNewRemoteJobIds(self):
      newRemoteJobIds = []
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "UPDATE pendingJobs SET state=%s WHERE(state=%s AND siteDesignator=%s)"
         sqlParameters = (self.JOBPENDINGSTATESELECTED,self.JOBPENDINGSTATECREATED,self.siteDesignator)
         try:
            self.myDatabase.update(sqlCommand,sqlParameters)
         except:
            self.logger.log(logging.ERROR,getLogMessage("getNewRemoteJobIds:myDatabase.update(pendingJobs) failed"))
         else:
            sqlCommand = "SELECT siteDesignator,remoteJobId,jobWorkDirectory,localJobId,instanceId,runName \
                            FROM pendingJobs WHERE(state=%s AND siteDesignator=%s)"
            sqlParameters = (self.JOBPENDINGSTATESELECTED,self.siteDesignator)
            try:
               result = self.myDatabase.select(sqlCommand,sqlParameters)
            except:
               pass
            else:
               for row in result:
                  jsonObject = {'messageType':'newJobId',
                                'siteDesignator':row[0],
                                'remoteJobId':row[1],
                                'jobWorkDirectory':row[2],
                                'localJobId':row[3],
                                'instanceId':row[4],
                                'runName':row[5],
                                'tailFiles':{}}

                  sqlCommand = "SELECT fileName,nLines FROM fileTailings WHERE(siteDesignator=%s AND remoteJobId=%s)"
                  sqlParameters = (self.siteDesignator,row[1])
                  try:
                     tailResult = self.myDatabase.select(sqlCommand,sqlParameters)
                  except:
                     pass
                  else:
                     for tailRow in tailResult:
                        jsonObject['tailFiles'][tailRow[0]] = {'nLines':tailRow[1]}

                  newRemoteJobIds.append(jsonObject)

               sqlCommand = "DELETE FROM pendingJobs WHERE(state=%s AND siteDesignator=%s)"
               sqlParameters = (self.JOBPENDINGSTATESELECTED,self.siteDesignator)
#              self.logger.log(logging.DEBUG,getLogMessage("DELETE pendingJobs %s" % (self.siteDesignator)))
               try:
                  nDeleted = self.myDatabase.delete(sqlCommand,sqlParameters)
               except:
                  pass

               self.myDatabase.commit()
         self.myDatabase.disconnect()

      return(newRemoteJobIds)


   def getOldRemainingRemoteJobIds(self):
      oldRemainingRemoteJobIds = []
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "SELECT localJobId,instanceId,runName,remoteJobId \
                         FROM activeJobs WHERE(siteDesignator=%s AND \
                                               jobStatus!='Dr' AND jobStatus!='D' AND jobStatus!='N' AND jobStatus!='?')"
         sqlParameters = (self.siteDesignator,)
         try:
            result = self.myDatabase.select(sqlCommand,sqlParameters)
         except:
            pass
         else:
            for row in result:
               jsonObject = {'messageType':'orphanJobId',
                             'siteDesignator':self.siteDesignator,
                             'localJobId':row[0],
                             'instanceId':row[1],
                             'runName':row[2],
                             'remoteJobId':row[3]}
               oldRemainingRemoteJobIds.append(jsonObject)

         self.myDatabase.disconnect()

      return(oldRemainingRemoteJobIds)


   def getOrphanRemoteJobIds(self):
      orphanRemoteJobIds = []
      if self.myDatabase.connect(self.configData['mysqlDB']):
         timeOrphaned = time.time()-60*10
         sqlCommand = "SELECT localJobId,instanceId,runName,remoteJobId \
                         FROM activeJobs WHERE(siteDesignator=%s AND jobStatus=%s AND timeRecorded<%s)"
         sqlParameters = (self.siteDesignator,'N',timeOrphaned)
         try:
            result = self.myDatabase.select(sqlCommand,sqlParameters)
         except:
            pass
         else:
            for row in result:
               jsonObject = {'messageType':'orphanJobId',
                             'siteDesignator':self.siteDesignator,
                             'localJobId':row[0],
                             'instanceId':row[1],
                             'runName':row[2],
                             'remoteJobId':row[3]}
               orphanRemoteJobIds.append(jsonObject)

         sqlParameters = (self.siteDesignator,'?',timeOrphaned)
         try:
            result = self.myDatabase.select(sqlCommand,sqlParameters)
         except:
            pass
         else:
            for row in result:
               jsonObject = {'messageType':'orphanJobId',
                             'siteDesignator':self.siteDesignator,
                             'localJobId':row[0],
                             'instanceId':row[1],
                             'runName':row[2],
                             'remoteJobId':row[3]}
               orphanRemoteJobIds.append(jsonObject)

         self.myDatabase.disconnect()

      return(orphanRemoteJobIds)


   def isJobSitePending(self):
      jobSitePending = False
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "SELECT COUNT(*) FROM pendingJobSites WHERE(jobSite=%s)"
         sqlParameters = (self.siteDesignator,)
         try:
            result = self.myDatabase.select(sqlCommand,sqlParameters)
         except:
            pass
         else:
            if len(result) == 1:
               row = result[0]
               nJobSitePending = row[0]
               if nJobSitePending > 0:
                  jobSitePending = True
         self.myDatabase.disconnect()

      return(jobSitePending)


   def deletePendingJobSite(self):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "DELETE FROM pendingJobSites WHERE(jobSite=%s)"
         sqlParameters = (self.siteDesignator,)
         try:
            nDeleted = self.myDatabase.delete(sqlCommand,sqlParameters)
         except:
            pass
         else:
            self.myDatabase.commit()
         self.myDatabase.disconnect()


   def addActiveJobSite(self,
                        pid):
      now = time.time()
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "INSERT INTO activeJobSites (jobSite,pid,timeUpdated) \
                              VALUES(%s,%s,%s)"
         sqlParameters = (self.siteDesignator,pid,now)
         try:
            self.myDatabase.insert(sqlCommand,sqlParameters)
         except:
            pass
         else:
            self.myDatabase.commit()
         self.myDatabase.disconnect()


   def deleteActiveJobSite(self):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "DELETE FROM activeJobSites WHERE(jobSite=%s)"
         sqlParameters = (self.siteDesignator,)
         try:
            nDeleted = self.myDatabase.delete(sqlCommand,sqlParameters)
         except:
            pass
         else:
            self.myDatabase.commit()
         self.myDatabase.disconnect()


   def addRestartJobSite(self):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         timeUpdated = time.time()
         sqlCommand = "INSERT INTO restartJobSites (jobSite,timeUpdated) \
                            VALUES(%s,%s)"
         sqlParameters = (self.siteDesignator,timeUpdated)
         try:
            self.myDatabase.insert(sqlCommand,sqlParameters)
         except:
            pass
         else:
            self.myDatabase.commit()
         self.myDatabase.disconnect()


   def updateActiveJobSite(self):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         now = time.time()
         sqlCommand = "UPDATE activeJobSites SET timeUpdated=%s WHERE(jobSite=%s)"
         sqlParameters = (now,self.siteDesignator)
         try:
            self.myDatabase.update(sqlCommand,sqlParameters)
         except:
            pass
         else:
            self.myDatabase.commit()
         self.myDatabase.disconnect()


   def updateWFJobs(self):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         sqlCommand = "SELECT siteDesignator,remoteJobId,localJobId, \
                              instanceId,jobStatus,jobStage,jobQueue,executionHost \
                         FROM activeJobs WHERE(siteDesignator=%s AND jobStatus='WF')"
         sqlParameters = (self.siteDesignator,)
         try:
            result = self.myDatabase.select(sqlCommand,sqlParameters)
         except:
            pass
         else:
            timeRecorded = time.time()
            for row in result:
               siteDesignator = row[0]
               remoteJobId    = row[1]
               localJobId     = row[2]
               instanceId     = row[3]
               jobStatus      = row[4]
               jobStage       = row[5]
               jobQueue       = row[6]
               executionHost  = row[7]

               sqlCommand = "SELECT jobStatus FROM activeJobs WHERE(localJobId=%s AND instanceId=0)"
               sqlParameters = (localJobId,)
               try:
                  result = self.myDatabase.select(sqlCommand,sqlParameters)
               except:
                  pass
               else:
                  if len(result) == 1:
                     row = result[0]
                     if row[0] == 'D' or row[0] == 'Dr':
                        jobStatus = row[0]
                  else:
                     jobStatus = 'Dr'
                  if jobStatus != 'WF':
                     sqlCommand = "UPDATE activeJobs SET jobStatus=%s,jobStage=%s,jobQueue=%s,executionHost=%s,timeRecorded=%s \
                                                   WHERE(siteDesignator=%s AND remoteJobId=%s)"
                     sqlParameters = (jobStatus,jobStage,
                                      jobQueue,executionHost,
                                      timeRecorded,
                                      siteDesignator,remoteJobId)
                     try:
                        self.myDatabase.update(sqlCommand,sqlParameters)
                     except:
                        pass

         self.myDatabase.commit()
         self.myDatabase.disconnect()


   @staticmethod
   def __textToBlob(text):
      try:
         blob = base64.b64encode(text).decode('utf-8')
      except UnicodeEncodeError:
         blob = base64.b64encode(text.encode('utf-8')).decode('utf-8')
      except TypeError:
         blob = base64.b64encode(text.encode('utf-8')).decode('utf-8')

      return(blob)


   @staticmethod
   def __blobToText(blob):
      try:
         text = base64.b64decode(blob.encode('utf-8'))
      except AttributeError:
         text = base64.b64decode(blob).decode('utf-8')

      return(text)


   def updateSite(self,
                  jsonObject):
      if self.myDatabase.connect(self.configData['mysqlDB']):
         siteDesignator = jsonObject['siteDesignator']
         timeRecorded = time.time()
         for jobState in jsonObject['jobStates']:
            try:
               remoteJobId   = jobState.get('jobId')
               jobStatus     = jobState.get('status')
               jobStage      = jobState.get('stage')
               jobQueue      = jobState.get('queue','?')
               executionHost = jobState.get('executionHost','?')
               tailFiles     = jobState.get('tailFiles',{})
#              if tailFiles:
#                 self.logger.log(logging.DEBUG,getLogMessage("%s" % (str(tailFiles))))

               sqlCommand = "UPDATE activeJobs SET jobStatus=%s,jobStage=%s,jobQueue=%s,executionHost=%s,timeRecorded=%s \
                                             WHERE(siteDesignator=%s AND remoteJobId=%s)"
               sqlParameters = (jobStatus,jobStage,
                                jobQueue,executionHost,
                                timeRecorded,
                                siteDesignator,remoteJobId)
               try:
                  self.myDatabase.update(sqlCommand,sqlParameters)
               except:
                  pass
               else:
                  for tailFile in tailFiles:
                     tailText = tailFiles[tailFile]
                     if tailText != '?':
                        nLines   = 0
                        tailTextDB = ""
                        sqlCommand = "SELECT nLines,tailText FROM fileTailings \
                                              WHERE(siteDesignator=%s AND remoteJobId=%s AND fileName=%s)"
                        sqlParameters = (siteDesignator,remoteJobId,tailFile)
                        try:
                           result = self.myDatabase.select(sqlCommand,sqlParameters)
                        except:
                           pass
                        else:
                           if len(result) == 1:
                              row = result[0]
                              nLines     = row[0]
                              tailBlobDB = row[1]
                              tailTextDB = self.__blobToText(tailBlobDB)

#                       self.logger.log(logging.DEBUG,getLogMessage("updateFileTail: %d lines of %s" % (nLines,tailFile)))
                        updatedTailTextLines  = []
                        if tailTextDB:
                           updatedTailTextLines += tailTextDB.split('\n')
                        if tailText:
                           updatedTailTextLines += tailText.split('\n')
                        updatedTailText = '\n'.join(updatedTailTextLines[-nLines:])
#
# Maximum blob length in MySQL is 65535 characters
# If tailText is longer than the maximum allowed it must be truncated
# Reserve 4 bytes for padding and account for length increasing with
# base64 encoding
#
# maxTextLength = floor((65535-4)*6/8) = 49148
#
                        updatedTailText = updatedTailText[-49148:]
                        updatedTailBlob = self.__textToBlob(updatedTailText)
#                       self.logger.log(logging.DEBUG,getLogMessage("updateFileTail: updatedTailText = %s" % (updatedTailText)))
                        self.logger.log(logging.DEBUG,getLogMessage("updateFileTail: updatedTailText has %d lines" % \
                                                                                       (len(updatedTailTextLines))))

                        sqlCommand = "UPDATE fileTailings SET timeUpdated=%s,tailText=%s \
                                                  WHERE(siteDesignator=%s AND remoteJobId=%s AND fileName=%s)"
                        sqlParameters = (timeRecorded,updatedTailBlob,
                                         siteDesignator,remoteJobId,
                                         tailFile)
                        try:
                           self.myDatabase.update(sqlCommand,sqlParameters)
                        except:
                           pass
            except:
               self.logger.log(logging.ERROR,getLogMessage(traceback.format_exc()))

         self.myDatabase.commit()
         self.myDatabase.disconnect()

      self.updateWFJobs()
      self.updateActiveJobSite()


   def writeToSiteMonitor(self,
                          childin,
                          jsonMessage):
      try:
         childin.write((jsonMessage + '\n').encode('utf-8'))
         childin.flush()
      except Exception as e:
         if isinstance(e.args,tuple):
            if   e.args[0] == EPIPE:
               self.logger.log(logging.ERROR,getLogMessage("writeToSiteMonitor: Broken pipe"))
            elif e.args[0] == ECONNRESET:
               self.logger.log(logging.ERROR,getLogMessage("writeToSiteMonitor: Connection reset by peer"))
            else:
               self.logger.log(logging.ERROR,getLogMessage("Unexpected error in writeToSiteMonitor(%s)" % (jsonMessage[0:50])))
               self.logger.log(logging.ERROR,getLogMessage(traceback.format_exc()))
         else:
            self.logger.log(logging.ERROR,getLogMessage("Unexpected error in writeToSiteMonitor(%s)" % (jsonMessage[0:50])))
            self.logger.log(logging.ERROR,getLogMessage(traceback.format_exc()))
         self.terminate()


   def monitorSite(self):
      exitStatus = 0

      if self.configure():
         configFilePath = os.path.join(self.configurationDirectory,self.infosConfigurationFile)
         self.infosInfo             = InfosInfo(configFilePath)
         self.monitorsInfo          = MonitorsInfo(self.infosInfo.getInfoPath('monitors'))
         self.remoteTunnelMonitor   = RemoteTunnelMonitor(self.tunnelListenURI)
         self.remoteIdentityManager = RemoteIdentityManager(self.identityListenURI)

         exitStatus = self.buildSSHCommand()

         self.logger.log(logging.INFO,getLogMessage("****************************************"))
         self.logger.log(logging.INFO,getLogMessage("* %s job monitor started *" % (self.siteDesignator)))
         self.logger.log(logging.INFO,getLogMessage("****************************************"))

         if self.sshCommand:
            if self.tunnelDesignator != "":
               self.remoteTunnelMonitor.incrementTunnelUse(self.tunnelDesignator)

            child = subprocess.Popen(self.sshCommand,bufsize=1,
                                     stdin=subprocess.PIPE,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)
            self.siteMonitorPid = child.pid
            childin    = child.stdin
            childinFd  = childin.fileno()
            childout   = child.stdout
            childoutFd = childout.fileno()
            childerr   = child.stderr
            childerrFd = childerr.fileno()
            outEOF = False
            errEOF = False
            errData = []

            if self.isJobSitePending():
               self.addActiveJobSite(os.getpid())
               self.deletePendingJobSite()
            else:
               self.logger.log(logging.ERROR,getLogMessage("Activated job site %s not pending" % (self.siteDesignator)))

            newRemoteJobIds    = self.getNewRemoteJobIds()
            orphanRemoteJobIds = self.getOldRemainingRemoteJobIds()
            while True:
               readers = []
               if not outEOF:
                  readers.append(childoutFd)
               if not errEOF:
                  readers.append(childerrFd)
               writers = []
               if not outEOF or not errEOF:
                  if (len(newRemoteJobIds) > 0) or (len(orphanRemoteJobIds) > 0):
                     writers.append(childinFd)

               if outEOF and errEOF and not writers:
                  break

               try:
                  self.exitSystemCallOnInterrupt = True
                  readyReaders,readyWriters,readyExceptions = select.select(readers,writers,[],ACTIVITYUPDATEINTERVAL)
               except select.error as e:
                  if e.args[0] == EINTR:
                     readyReaders = []
                     readyWriters = []
                  else:
                     self.terminate()
               finally:
                  self.exitSystemCallOnInterrupt = False

               for readyReader in readyReaders:
                  if   readyReader == childoutFd:
                     jsonMessage = childout.readline().decode('utf-8')
                     if jsonMessage == "":
                        outEOF = True
                     else:
#                       self.logger.log(logging.INFO,getLogMessage("%d character message received from %s" % \
#                                                                     (len(jsonMessage),self.siteDesignator)))
                        while jsonMessage != "":
                           try:
                              jsonObject = json.loads(jsonMessage)
                           except ValueError:
                              self.logger.log(logging.ERROR,getLogMessage("JSON object %s could not be decoded" % (jsonMessage)))
                           else:
                              if jsonObject['messageType'] != 'pipeFlusher':
                                 self.logger.log(logging.DEBUG,getLogMessage("request = %s, message length = %d" % \
                                                                      (jsonObject['messageType'],len(jsonMessage))))
                                 requestStart = time.time()
                              if   jsonObject['messageType'] == 'siteUpdate':
                                 self.logger.log(logging.INFO,getLogMessage("%d/%d job status messages received from %s" % \
                                                                            (len(jsonObject['jobStates']), \
                                                                             jsonObject['nJobStates'], \
                                                                             self.siteDesignator)))
#                                self.logger.log(logging.DEBUG,getLogMessage("siteUpdate:\n%s" % (jsonObject)))
                                 self.updateSite(jsonObject)
                              elif jsonObject['messageType'] == 'pipeFlusher':
                                 break
                              else:
                                 self.logger.log(logging.ERROR,getLogMessage("Invalid message from site %s" % \
                                                                                        (self.siteDesignator)))
                              if jsonObject['messageType'] != 'pipeFlusher':
                                 requestTime = time.time()-requestStart
                                 self.logger.log(logging.DEBUG,getLogMessage("request = %s, processing time = %f" % \
                                                                            (jsonObject['messageType'],requestTime)))
                           jsonMessage = childout.readline().decode('utf-8')
                  elif readyReader == childerrFd:
                     errChunk = os.read(childerrFd,self.bufferSize).decode('utf-8')
                     if errChunk == '':
                        errEOF = True
                     errData.append(errChunk)

               for readyWriter in readyWriters:
                  if readyWriter == childinFd:
                     nJobsPushedToSite = 0
                     nNewRemoteJobIds = len(newRemoteJobIds)
                     if nNewRemoteJobIds > 0:
                        self.logger.log(logging.DEBUG,getLogMessage("%s - %d newRemoteJobIds" % \
                                                         (self.siteDesignator,nNewRemoteJobIds)))
                        while (len(newRemoteJobIds) > 0) and (nJobsPushedToSite < self.maximumJobsPushedToSite):
                           jsonObject = newRemoteJobIds.pop()
                           try:
                              jsonMessage = json.dumps(jsonObject)
                           except TypeError:
                              self.logger.log(logging.ERROR,getLogMessage("JSON object %s could not be encoded" % \
                                                                                                (str(jsonObject))))
                           else:
#                             self.logger.log(logging.DEBUG,getLogMessage("%s - newRemoteJobId %s" % \
#                                                    (self.siteDesignator,jsonObject['remoteJobId'])))
                              self.writeToSiteMonitor(childin,jsonMessage)
                              nJobsPushedToSite += 1

                     nOrphanRemoteJobIds = len(orphanRemoteJobIds)
                     if nOrphanRemoteJobIds > 0:
                        self.logger.log(logging.DEBUG,getLogMessage("%s - %d orphanRemoteJobIds" % \
                                                         (self.siteDesignator,nOrphanRemoteJobIds)))
                        while (len(orphanRemoteJobIds) > 0) and (nJobsPushedToSite < self.maximumJobsPushedToSite):
                           jsonObject = orphanRemoteJobIds.pop()
                           try:
                              jsonMessage = json.dumps(jsonObject)
                           except TypeError:
                              self.logger.log(logging.ERROR,getLogMessage("JSON object %s could not be encoded" % \
                                                                                                (str(jsonObject))))
                           else:
#                             self.logger.log(logging.DEBUG,getLogMessage("%s - orphanRemoteJobId %s" % \
#                                                       (self.siteDesignator,jsonObject['remoteJobId'])))
                              self.writeToSiteMonitor(childin,jsonMessage)
                              nJobsPushedToSite += 1

                     self.logger.log(logging.DEBUG,getLogMessage("%s - %d jobsPushedToSite, %d newJobIds, %d orphanJobIds" % \
                                        (self.siteDesignator,nJobsPushedToSite,len(newRemoteJobIds),len(orphanRemoteJobIds))))
                     jsonObject = {'messageType':'pipeFlusher'}
                     try:
                        jsonMessage = json.dumps(jsonObject)
                     except:
                        pass
                     else:
#                       log("Site pipe flushed")
                        self.writeToSiteMonitor(childin,jsonMessage)
                     time.sleep(5)

               if outEOF and errEOF and self.siteMonitorPid:
                  pid,exitStatus = os.waitpid(self.siteMonitorPid,0)
                  self.siteMonitorPid = 0
                  if exitStatus != 0:
                     if   os.WIFSIGNALED(exitStatus):
                        self.logger.log(logging.INFO,getLogMessage("%s failed w/ signal %d" % \
                                                    (self.sshCommand,os.WTERMSIG(exitStatus))))
                     else:
                        if os.WIFEXITED(exitStatus):
                           exitStatus = os.WEXITSTATUS(exitStatus)
                        self.logger.log(logging.INFO,getLogMessage("%s failed w/ exit code %d" % \
                                                                    (self.sshCommand,exitStatus)))
                     self.logger.log(logging.INFO,getLogMessage("%s" % ("".join(errData))))

                  if self.tunnelDesignator != "":
                     self.remoteTunnelMonitor.decrementTunnelUse(self.tunnelDesignator)

                  self.deleteActiveJobSite()
                  if exitStatus == 255:
                     self.addRestartJobSite()
               else:
                  if len(newRemoteJobIds) == 0:
                     newRemoteJobIds    = self.getNewRemoteJobIds()
                  if len(orphanRemoteJobIds) == 0:
                     orphanRemoteJobIds = self.getOrphanRemoteJobIds()
         else:
# noop monitors
            if self.isJobSitePending():
               self.addActiveJobSite(os.getpid())
               self.deletePendingJobSite()
            else:
               self.logger.log(logging.ERROR,getLogMessage("Activated job site %s not pending" % (self.siteDesignator)))

            now = time.time()
            while time.time()-now < NOOPINACTIVITYINTERVAL:
               newRemoteJobIds = self.getNewRemoteJobIds()
               if len(newRemoteJobIds) > 0:
                  now = time.time()
               del newRemoteJobIds
               time.sleep(NOOPCHECKINTERVAL)
            self.deleteActiveJobSite()

         self.logger.log(logging.INFO,getLogMessage("****************************************"))
         self.logger.log(logging.INFO,getLogMessage("* %s job monitor stopped *" % (self.siteDesignator)))
         self.logger.log(logging.INFO,getLogMessage("****************************************"))
      else:
         exitStatus = 1

      return(exitStatus)


if __name__ == '__main__':

   exitStatus = 0
   fdLogFile = openLogger(MONITORLOGLOCATION,MONITORLOGFILENAME)

   if len(sys.argv) == 2:
      daemonize(fdLogFile)

      os.environ['PATH'] = BINDIRECTORY + os.pathsep + os.environ['PATH']

      __monitorJobSite__ = MonitorJobSite(CONFIGURATIONDIRECTORY,MONITORCONFIGURATIONFILE,
                                          DAEMONSCONFIGURATIONFILE,INFOSCONFIGURATIONFILE,
                                          IDENTITYUSER,
                                          sys.argv[1])
      exitStatus = __monitorJobSite__.monitorSite()
   else:
      APPLICATIONLOGGER.log(logging.ERROR,getLogMessage("job site name not specified"))
      exitStatus = 1

   sys.exit(exitStatus)


