github.com/informationsea/shellflow@v0.1.3/viewlog.go (about)

     1  package main
     2  
     3  import (
     4  	"bufio"
     5  	"bytes"
     6  	"compress/gzip"
     7  	"encoding/json"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"path"
    13  	"reflect"
    14  	"sort"
    15  	"strconv"
    16  	"strings"
    17  	"time"
    18  
    19  	"github.com/informationsea/shellflow/flowscript"
    20  )
    21  
    22  //go:generate stringer -type=WorkflowState
    23  type WorkflowState int
    24  
    25  const (
    26  	WorkflowDone WorkflowState = iota
    27  	WorkflowRunning
    28  	WorkflowFailed
    29  	WorkflowUnknown
    30  )
    31  
    32  //go:generate stringer -type=JobState
    33  type JobState int
    34  
    35  const (
    36  	JobDone JobState = iota
    37  	JobRunning
    38  	JobFailed
    39  	JobPending
    40  	JobUnknown
    41  )
    42  
    43  type WorkflowLog struct {
    44  	WorkflowLogRoot string
    45  	WorkflowScript  string
    46  	ParameterFile   string
    47  	StartDate       time.Time
    48  	ChangedInput    []string
    49  	JobLogs         []*JobLog
    50  }
    51  
    52  func (v *WorkflowLog) String() string {
    53  	return fmt.Sprintf(`WorkflowLog{
    54    WorkflowScript: %s,
    55    WorkflowLogRoot: %s,
    56    ChangedInput: %s,
    57    JobLogs: %s,
    58  }`, strconv.Quote(v.WorkflowScript), strconv.Quote(v.WorkflowLogRoot), v.ChangedInput, v.JobLogs)
    59  }
    60  
    61  func (v *WorkflowLog) IsDone() bool {
    62  	for _, x := range v.JobLogs {
    63  		if !x.IsDone {
    64  			return false
    65  		}
    66  	}
    67  	return true
    68  }
    69  
    70  func (v *WorkflowLog) State() WorkflowState {
    71  	for _, x := range v.JobLogs {
    72  		if x.ExitCode > 0 {
    73  			return WorkflowFailed
    74  		}
    75  	}
    76  
    77  	for _, x := range v.JobLogs {
    78  		if x.ExitCode < 0 {
    79  			return WorkflowRunning
    80  		}
    81  	}
    82  
    83  	return WorkflowDone
    84  }
    85  
    86  func (v *WorkflowLog) IsChanged() bool {
    87  	for _, x := range v.JobLogs {
    88  		if x.IsAnyInputChanged || x.IsAnyOutputChanged {
    89  			return true
    90  		}
    91  	}
    92  	return false
    93  }
    94  
    95  func BoolToYesNo(x bool) string {
    96  	if x {
    97  		return "Yes"
    98  	}
    99  	return "No"
   100  }
   101  
   102  func (v *WorkflowLog) Summary(failedOnly bool) string {
   103  	var buf = bytes.NewBuffer(nil)
   104  	fmt.Fprintf(buf, "Workflow Script Path: %s\n", v.WorkflowScript)
   105  	fmt.Fprintf(buf, "   Workflow Log Path: %s\n", v.WorkflowLogRoot)
   106  	fmt.Fprintf(buf, "           Job Start: %s\n", v.StartDate.Format("2006/01/02 15:04:05"))
   107  	fmt.Fprintf(buf, " Changed Input Files:")
   108  	for _, x := range v.ChangedInput {
   109  		fmt.Fprintf(buf, " %s", x)
   110  	}
   111  	fmt.Fprint(buf, "\n")
   112  
   113  	for _, j := range v.JobLogs {
   114  		if j.State() != JobFailed && failedOnly {
   115  			continue
   116  		}
   117  
   118  		fmt.Fprintf(buf, "---- Job: %d ------------\n", j.ShellTask.ID)
   119  		fmt.Fprintf(buf, "             State: %s\n", j.State().String())
   120  		if j.ExitCode >= 0 {
   121  			fmt.Fprintf(buf, "         Exit code: %d\n", j.ExitCode)
   122  		}
   123  		fmt.Fprintf(buf, "          Reusable: %s\n", BoolToYesNo(j.IsReusable()))
   124  		fmt.Fprintf(buf, "            Script: %s\n", j.ShellTask.ShellScript)
   125  		fmt.Fprintf(buf, "             Input:")
   126  		for _, x := range j.ShellTask.DependentFiles.Array() {
   127  			fmt.Fprintf(buf, " %s", x)
   128  		}
   129  		fmt.Fprint(buf, "\n")
   130  		fmt.Fprintf(buf, "            Output:")
   131  		for _, x := range j.ShellTask.CreatingFiles.Array() {
   132  			fmt.Fprintf(buf, " %s", x)
   133  		}
   134  		fmt.Fprint(buf, "\n")
   135  
   136  		fmt.Fprintf(buf, " Dependent Job IDs:")
   137  		for _, x := range j.ShellTask.DependentTaskID {
   138  			fmt.Fprintf(buf, " %d", x)
   139  		}
   140  		fmt.Fprint(buf, "\n")
   141  
   142  		if j.SgeTaskID != "" {
   143  			fmt.Fprintf(buf, "       SGE Task ID: %s\n", strings.TrimSpace(j.SgeTaskID))
   144  		}
   145  
   146  		fmt.Fprintf(buf, "     Log directory: %s\n", j.JobLogRoot)
   147  
   148  		if j.State() == JobFailed {
   149  			logfile, err := os.Open(path.Join(j.JobLogRoot, "script.stderr"))
   150  			if err == nil {
   151  				fmt.Fprintf(buf, "  - - - - - - Stderr - - - - - -\n")
   152  				scanner := bufio.NewScanner(logfile)
   153  				scanner.Split(bufio.ScanLines)
   154  				for i := 0; i < 3 && scanner.Scan(); i++ {
   155  					fmt.Fprintf(buf, "  %s\n", scanner.Text())
   156  				}
   157  			}
   158  		}
   159  	}
   160  
   161  	return string(buf.Bytes())
   162  }
   163  
   164  type JobLog struct {
   165  	JobLogRoot         string
   166  	InputFiles         []FileLog
   167  	OutputFiles        []FileLog
   168  	IsStarted          bool
   169  	IsAnyInputChanged  bool
   170  	IsDone             bool
   171  	IsAnyOutputChanged bool
   172  	ExitCode           int
   173  	ScriptExitCode     int
   174  	ShellTask          *ShellTask
   175  	SgeTaskID          string
   176  }
   177  
   178  func (v *JobLog) String() string {
   179  	return fmt.Sprintf(`  JobLog{
   180      JobLogRoot:         %s,
   181      InputFiles:         %s,
   182      OutputFiles:        %s,
   183      IsReusable:         %v,
   184      IsStarted:          %v,
   185      IsAnyInputChanged:  %v,
   186      IsDone:             %v,
   187      IsAnyOutputChanged: %v,
   188      ExitCode:           %d,
   189      ScriptExitCode:     %d,
   190      ShellTask:          %s
   191    }
   192  `, strconv.Quote(v.JobLogRoot), v.InputFiles, v.OutputFiles, v.IsReusable(), v.IsStarted, v.IsAnyInputChanged, v.IsDone, v.IsAnyOutputChanged, v.ExitCode, v.ScriptExitCode, v.ShellTask.String())
   193  }
   194  
   195  func (v *JobLog) State() JobState {
   196  	if v.IsDone && v.ExitCode == 0 {
   197  		return JobDone
   198  	} else if v.IsDone {
   199  		return JobFailed
   200  	} else if v.IsStarted {
   201  		return JobRunning
   202  	} else {
   203  		return JobPending
   204  	}
   205  }
   206  
   207  func (v *JobLog) IsReusable() bool {
   208  	return v.IsDone && v.IsStarted && !v.IsAnyInputChanged && !v.IsAnyOutputChanged && v.ExitCode == 0
   209  }
   210  
   211  func LoadJsonFromFile(path string, obj interface{}) error {
   212  	file, err := os.Open(path)
   213  	if err != nil {
   214  		return err
   215  	}
   216  	defer file.Close()
   217  	decoder := json.NewDecoder(file)
   218  	return decoder.Decode(obj)
   219  }
   220  
   221  func CollectLogsForOneJob(jobRoot string, oneTask *ShellTask) (*JobLog, error) {
   222  	// check return code file
   223  	var jobDone = true
   224  	var exitCode = -1
   225  	var scriptExitCode = -1
   226  	var exitCodeFile io.Reader
   227  	var err error
   228  	exitCodeFile, err = os.Open(path.Join(jobRoot, "rc"))
   229  	if os.IsNotExist(err) {
   230  		followUpDone, err := FollowUpLocalSingle(jobRoot)
   231  		if err != nil {
   232  			return nil, fmt.Errorf("Failed to follow up %s : %s", jobRoot, err.Error())
   233  		}
   234  		if !followUpDone {
   235  			followUpDone, err = FollowUpSge(jobRoot)
   236  			if err != nil {
   237  				return nil, fmt.Errorf("Failed to follow up %s : %s", jobRoot, err.Error())
   238  			}
   239  		}
   240  		if !followUpDone {
   241  			exitCodeFile = bytes.NewReader([]byte("1000"))
   242  		} else {
   243  			exitCodeFile, err = os.Open(path.Join(jobRoot, "rc"))
   244  		}
   245  	}
   246  
   247  	if err == nil {
   248  		data, err := ioutil.ReadAll(exitCodeFile)
   249  		if err != nil {
   250  			return nil, err
   251  		}
   252  		exitCode64, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 32)
   253  		if err != nil {
   254  			return nil, err
   255  		}
   256  		exitCode = int(exitCode64)
   257  		scriptExitCode = exitCode
   258  	} else if os.IsNotExist(err) {
   259  		jobDone = false
   260  	} else {
   261  		jobDone = false
   262  		return nil, err
   263  	}
   264  
   265  	// check input files
   266  	var inputFiles []FileLog
   267  	var jobStarted bool
   268  	var anyInputChanged = false
   269  	err = LoadJsonFromFile(path.Join(jobRoot, "input.json"), &inputFiles)
   270  	if err == nil {
   271  		jobStarted = true
   272  		for _, v := range inputFiles {
   273  			//inputFiles[i].Content = nil
   274  			chg, err := v.IsChanged()
   275  			if err != nil {
   276  				return nil, err
   277  			}
   278  			if chg {
   279  				anyInputChanged = true
   280  			}
   281  		}
   282  	} else if os.IsNotExist(err) {
   283  		jobStarted = false
   284  	} else {
   285  		fmt.Fprintf(os.Stderr, "Failed to load %s\n", err.Error())
   286  		jobStarted = false
   287  	}
   288  
   289  	// check output files
   290  	var outputFiles []FileLog
   291  	var anyOutputChanged = false
   292  	var outputFound = false
   293  	err = LoadJsonFromFile(path.Join(jobRoot, "output.json"), &outputFiles)
   294  	if err == nil {
   295  		for _, v := range outputFiles {
   296  			chg, err := v.IsChanged()
   297  			//outputFiles[i].Content = nil
   298  			if err != nil {
   299  				return nil, err
   300  			}
   301  			if chg {
   302  				anyOutputChanged = true
   303  			}
   304  		}
   305  		outputFound = true
   306  	} else if os.IsNotExist(err) {
   307  		outputFound = false
   308  	} else {
   309  		fmt.Fprintf(os.Stderr, "Failed to load %s\n", err.Error())
   310  		outputFound = false
   311  	}
   312  
   313  	if jobDone && exitCode == 0 && !outputFound {
   314  		exitCode = 1000
   315  	}
   316  
   317  	// check SGE task id
   318  	var sgeTaskID string
   319  	sgeTaskIDFile, err := os.Open(path.Join(jobRoot, "sge-taskid.txt"))
   320  	if err == nil {
   321  		defer sgeTaskIDFile.Close()
   322  		data, err := ioutil.ReadAll(sgeTaskIDFile)
   323  		if err != nil {
   324  			return nil, err
   325  		}
   326  		sgeTaskID = string(data)
   327  	} else if os.IsNotExist(err) {
   328  		// ignore
   329  	} else {
   330  		return nil, err
   331  	}
   332  
   333  	return &JobLog{
   334  		JobLogRoot:         jobRoot,
   335  		InputFiles:         inputFiles,
   336  		OutputFiles:        outputFiles,
   337  		IsStarted:          jobStarted,
   338  		IsAnyInputChanged:  anyInputChanged,
   339  		IsDone:             jobDone,
   340  		IsAnyOutputChanged: anyOutputChanged,
   341  		ExitCode:           exitCode,
   342  		ScriptExitCode:     scriptExitCode,
   343  		ShellTask:          oneTask,
   344  		SgeTaskID:          sgeTaskID,
   345  	}, nil
   346  }
   347  
   348  const workflowLogCacheFileName = "workflowLogCache.json.gz"
   349  
   350  func CollectLogsForOneWorkFromCache(logdirPath string) (*WorkflowLog, error) {
   351  	cacheFile, err := os.OpenFile(path.Join(logdirPath, workflowLogCacheFileName), os.O_RDWR, 0644)
   352  	if err != nil {
   353  		return nil, err
   354  	}
   355  	defer cacheFile.Close()
   356  	cacheFileUngzip, err := gzip.NewReader(cacheFile)
   357  	if err != nil {
   358  		return nil, err
   359  	}
   360  	jsonDecoder := json.NewDecoder(cacheFileUngzip)
   361  
   362  	var workflowLog WorkflowLog
   363  	err = jsonDecoder.Decode(&workflowLog)
   364  	if err != nil {
   365  		return nil, err
   366  	}
   367  
   368  	var dependentFiles []FileLog
   369  	err = LoadJsonFromFile(path.Join(logdirPath, "input.json"), &dependentFiles)
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  
   374  	changedInput := make([]string, 0)
   375  	for _, oneDependent := range dependentFiles {
   376  		changed, err := oneDependent.IsChanged()
   377  		if err != nil {
   378  			return nil, err
   379  		}
   380  		if changed {
   381  			changedInput = append(changedInput, oneDependent.Relpath)
   382  		}
   383  	}
   384  	workflowLog.ChangedInput = changedInput
   385  
   386  	// re-check files
   387  	for i, job := range workflowLog.JobLogs {
   388  		if job.IsDone {
   389  			if !job.IsAnyInputChanged {
   390  				anyInputChanged := false
   391  				for _, v := range job.InputFiles {
   392  					changed, err := v.IsChanged()
   393  					if err != nil {
   394  						return nil, err
   395  					}
   396  
   397  					if changed {
   398  						anyInputChanged = true
   399  						break
   400  					}
   401  				}
   402  				job.IsAnyInputChanged = anyInputChanged
   403  			}
   404  
   405  			if !job.IsAnyOutputChanged {
   406  				anyOutputChanged := false
   407  				for _, v := range job.OutputFiles {
   408  					changed, err := v.IsChanged()
   409  					if err != nil {
   410  						return nil, err
   411  					}
   412  
   413  					if changed {
   414  						anyOutputChanged = true
   415  						break
   416  					}
   417  				}
   418  				job.IsAnyOutputChanged = anyOutputChanged
   419  			}
   420  
   421  		} else {
   422  			fmt.Fprintf(os.Stderr, "rescanning %s\n", job.JobLogRoot)
   423  			newJob, err := CollectLogsForOneJob(job.JobLogRoot, job.ShellTask)
   424  			if err != nil {
   425  				return nil, err
   426  			}
   427  			workflowLog.JobLogs[i] = newJob
   428  		}
   429  	}
   430  
   431  	cacheFile.Seek(0, os.SEEK_SET)
   432  	cacheFileGzip := gzip.NewWriter(cacheFile)
   433  	defer cacheFileGzip.Close()
   434  
   435  	cacheEncoder := json.NewEncoder(cacheFileGzip)
   436  	cacheEncoder.SetIndent("", "  ")
   437  	err = cacheEncoder.Encode(workflowLog)
   438  	if err != nil {
   439  		return nil, fmt.Errorf("Failed to encode workflow log cache: %s", err.Error())
   440  	}
   441  
   442  	return &workflowLog, nil
   443  }
   444  
   445  func CollectLogsForOneWork(logdirPath string) (*WorkflowLog, error) {
   446  	workflowLog, err := CollectLogsForOneWorkFromCache(logdirPath)
   447  	if os.IsNotExist(err) {
   448  		workflowLog, err = CollectLogsForOneWorkWithScan(logdirPath)
   449  	}
   450  	if err != nil {
   451  		return nil, fmt.Errorf("Error while scanning workflow log: %s", err.Error())
   452  	}
   453  	return workflowLog, nil
   454  }
   455  
   456  func CollectLogsForOneWorkWithScan(logdirPath string) (*WorkflowLog, error) {
   457  	jobs := make([]*JobLog, 0)
   458  
   459  	var metadata WorkflowMetaData
   460  	err := LoadJsonFromFile(path.Join(logdirPath, "runtime.json"), &metadata)
   461  	if err != nil {
   462  		return nil, nil
   463  	}
   464  
   465  	var dependentFiles []FileLog
   466  	err = LoadJsonFromFile(path.Join(logdirPath, "input.json"), &dependentFiles)
   467  	if err != nil {
   468  		return nil, err
   469  	}
   470  
   471  	changedInput := make([]string, 0)
   472  	for _, oneDependent := range dependentFiles {
   473  		changed, err := oneDependent.IsChanged()
   474  		if err != nil {
   475  			return nil, err
   476  		}
   477  		if changed {
   478  			changedInput = append(changedInput, oneDependent.Relpath)
   479  		}
   480  	}
   481  
   482  	for _, oneTask := range metadata.Tasks {
   483  		jobRoot := path.Join(logdirPath, fmt.Sprintf("job%03d", oneTask.ID))
   484  		oneJob, err := CollectLogsForOneJob(jobRoot, oneTask)
   485  		if err != nil {
   486  			return nil, err
   487  		}
   488  
   489  		jobs = append(jobs, oneJob)
   490  	}
   491  
   492  	result := &WorkflowLog{
   493  		WorkflowScript:  metadata.WorkflowPath,
   494  		WorkflowLogRoot: logdirPath,
   495  		ParameterFile:   metadata.ParameterFile,
   496  		StartDate:       metadata.Date,
   497  		JobLogs:         jobs,
   498  		ChangedInput:    changedInput,
   499  	}
   500  
   501  	cacheFile, err := os.OpenFile(path.Join(logdirPath, workflowLogCacheFileName), os.O_CREATE|os.O_WRONLY, 0644)
   502  	if err != nil {
   503  		return nil, fmt.Errorf("Cannot open cache file: %s", err.Error())
   504  	}
   505  	defer cacheFile.Close()
   506  	cacheFileGzip := gzip.NewWriter(cacheFile)
   507  	defer cacheFileGzip.Close()
   508  
   509  	cacheEncoder := json.NewEncoder(cacheFileGzip)
   510  	cacheEncoder.SetIndent("", "  ")
   511  	err = cacheEncoder.Encode(result)
   512  	if err != nil {
   513  		return nil, fmt.Errorf("Failed to encode workflow log cache: %s", err.Error())
   514  	}
   515  
   516  	return result, nil
   517  }
   518  
   519  func CollectLogs(logdir string) (WorkflowLogArray, error) {
   520  	logdirFile, err := os.Open(logdir)
   521  	if os.IsNotExist(err) {
   522  		return WorkflowLogArray([]*WorkflowLog{}), nil
   523  	}
   524  	if err != nil {
   525  		return nil, fmt.Errorf("Cannot open log directory in collectlogs: %s", err.Error())
   526  	}
   527  
   528  	workflowLogDirs, err := logdirFile.Readdir(0)
   529  	if err != nil {
   530  		return nil, fmt.Errorf("Cannot read log directory in collectlogs: %s", err.Error())
   531  	}
   532  
   533  	workflowLogs := WorkflowLogArray(make([]*WorkflowLog, 0))
   534  
   535  	for _, oneWorkflowLogDir := range workflowLogDirs {
   536  		if !oneWorkflowLogDir.IsDir() {
   537  			continue
   538  		}
   539  		logdirPath := path.Join(logdir, oneWorkflowLogDir.Name())
   540  
   541  		one, err := CollectLogsForOneWork(logdirPath)
   542  		if err != nil {
   543  			return nil, err
   544  		}
   545  		if one != nil {
   546  			workflowLogs = append(workflowLogs, one)
   547  		}
   548  	}
   549  
   550  	sort.Sort(workflowLogs)
   551  
   552  	return workflowLogs, nil
   553  }
   554  
   555  type WorkflowLogArray []*WorkflowLog
   556  
   557  func (v WorkflowLogArray) Len() int           { return len(v) }
   558  func (v WorkflowLogArray) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
   559  func (v WorkflowLogArray) Less(i, j int) bool { return v[i].StartDate.Before(v[j].StartDate) }
   560  
   561  func (v WorkflowLogArray) SearchReusableJob(shellscript string, workdir string, dependentFiles flowscript.StringSet, creatingFiles flowscript.StringSet) *JobLog {
   562  
   563  	for _, x := range v {
   564  		for _, y := range x.JobLogs {
   565  			if (!y.IsReusable()) || (y.ShellTask.ShellScript != shellscript) || (!reflect.DeepEqual(y.ShellTask.DependentFiles, dependentFiles)) {
   566  				continue
   567  			}
   568  			//fmt.Printf("found %s\n", y.JobLogRoot)
   569  			return y
   570  		}
   571  	}
   572  
   573  	return nil
   574  }
   575  
   576  const viewLogShowMax = 10
   577  
   578  func ViewLog(showAll bool, failedOnly bool) error {
   579  	logs, err := CollectLogs(WorkflowLogDir)
   580  	if err != nil {
   581  		return err
   582  	}
   583  
   584  	fmt.Printf("%3s|%7s|Success|Failed|Running|Pending|File Changed|%-19s|Name\n", "#", "State", "Start Date")
   585  
   586  	showLogs := make([]int, 0)
   587  
   588  	count := 0
   589  	for i := len(logs) - 1; i >= 0; i-- {
   590  		if !showAll && count >= viewLogShowMax {
   591  			break
   592  		}
   593  		if !failedOnly || logs[i].State() == WorkflowFailed {
   594  			count++
   595  			showLogs = append(showLogs, i)
   596  		}
   597  	}
   598  
   599  	sort.Sort(sort.IntSlice(showLogs))
   600  
   601  	for _, i := range showLogs {
   602  		v := logs[i]
   603  
   604  		files := "No"
   605  		if v.IsChanged() {
   606  			files = "Yes"
   607  		}
   608  
   609  		successJobs := 0
   610  		failedJobs := 0
   611  		runningJobs := 0
   612  		notStartedJobs := 0
   613  
   614  		for _, x := range v.JobLogs {
   615  			switch x.State() {
   616  			case JobDone:
   617  				successJobs++
   618  			case JobFailed:
   619  				failedJobs++
   620  			case JobRunning:
   621  				runningJobs++
   622  			case JobPending:
   623  				notStartedJobs++
   624  			}
   625  		}
   626  
   627  		name := path.Base(v.WorkflowScript)
   628  		if v.ParameterFile != "" {
   629  			name += " " + path.Base(v.ParameterFile)
   630  		}
   631  
   632  		fmt.Printf("%3d|%7s|%7d|%6d|%7d|%7d|%12s|%10s|%s\n", i+1, v.State().String()[8:], successJobs, failedJobs, runningJobs, notStartedJobs, files, v.StartDate.Format("2006/01/02 15:04:05"), name)
   633  	}
   634  	return nil
   635  }
   636  
   637  func ViewLogDetail(args []string, failedOnly bool) error {
   638  	logs, err := CollectLogs(WorkflowLogDir)
   639  	if err != nil {
   640  		return err
   641  	}
   642  
   643  	fmt.Printf("len: %d\n", len(logs))
   644  	first := true
   645  	for _, v := range args {
   646  		val, err := strconv.ParseInt(v, 10, 32)
   647  		if err == nil && val > 0 && val <= int64(len(logs)) {
   648  			if first {
   649  				first = false
   650  			} else {
   651  				fmt.Printf("===========================")
   652  			}
   653  			fmt.Printf("%s", logs[val-1].Summary(failedOnly))
   654  		} else {
   655  			fmt.Fprintf(os.Stderr, "Bad Workflow Record Number: %s\n", v)
   656  		}
   657  	}
   658  	return nil
   659  }