github.com/informationsea/shellflow@v0.1.3/viewlog.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "bytes" 6 "compress/gzip" 7 "encoding/json" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "path" 13 "reflect" 14 "sort" 15 "strconv" 16 "strings" 17 "time" 18 19 "github.com/informationsea/shellflow/flowscript" 20 ) 21 22 //go:generate stringer -type=WorkflowState 23 type WorkflowState int 24 25 const ( 26 WorkflowDone WorkflowState = iota 27 WorkflowRunning 28 WorkflowFailed 29 WorkflowUnknown 30 ) 31 32 //go:generate stringer -type=JobState 33 type JobState int 34 35 const ( 36 JobDone JobState = iota 37 JobRunning 38 JobFailed 39 JobPending 40 JobUnknown 41 ) 42 43 type WorkflowLog struct { 44 WorkflowLogRoot string 45 WorkflowScript string 46 ParameterFile string 47 StartDate time.Time 48 ChangedInput []string 49 JobLogs []*JobLog 50 } 51 52 func (v *WorkflowLog) String() string { 53 return fmt.Sprintf(`WorkflowLog{ 54 WorkflowScript: %s, 55 WorkflowLogRoot: %s, 56 ChangedInput: %s, 57 JobLogs: %s, 58 }`, strconv.Quote(v.WorkflowScript), strconv.Quote(v.WorkflowLogRoot), v.ChangedInput, v.JobLogs) 59 } 60 61 func (v *WorkflowLog) IsDone() bool { 62 for _, x := range v.JobLogs { 63 if !x.IsDone { 64 return false 65 } 66 } 67 return true 68 } 69 70 func (v *WorkflowLog) State() WorkflowState { 71 for _, x := range v.JobLogs { 72 if x.ExitCode > 0 { 73 return WorkflowFailed 74 } 75 } 76 77 for _, x := range v.JobLogs { 78 if x.ExitCode < 0 { 79 return WorkflowRunning 80 } 81 } 82 83 return WorkflowDone 84 } 85 86 func (v *WorkflowLog) IsChanged() bool { 87 for _, x := range v.JobLogs { 88 if x.IsAnyInputChanged || x.IsAnyOutputChanged { 89 return true 90 } 91 } 92 return false 93 } 94 95 func BoolToYesNo(x bool) string { 96 if x { 97 return "Yes" 98 } 99 return "No" 100 } 101 102 func (v *WorkflowLog) Summary(failedOnly bool) string { 103 var buf = bytes.NewBuffer(nil) 104 fmt.Fprintf(buf, "Workflow Script Path: %s\n", v.WorkflowScript) 105 fmt.Fprintf(buf, " Workflow Log Path: %s\n", v.WorkflowLogRoot) 106 fmt.Fprintf(buf, " Job Start: %s\n", v.StartDate.Format("2006/01/02 15:04:05")) 107 fmt.Fprintf(buf, " Changed Input Files:") 108 for _, x := range v.ChangedInput { 109 fmt.Fprintf(buf, " %s", x) 110 } 111 fmt.Fprint(buf, "\n") 112 113 for _, j := range v.JobLogs { 114 if j.State() != JobFailed && failedOnly { 115 continue 116 } 117 118 fmt.Fprintf(buf, "---- Job: %d ------------\n", j.ShellTask.ID) 119 fmt.Fprintf(buf, " State: %s\n", j.State().String()) 120 if j.ExitCode >= 0 { 121 fmt.Fprintf(buf, " Exit code: %d\n", j.ExitCode) 122 } 123 fmt.Fprintf(buf, " Reusable: %s\n", BoolToYesNo(j.IsReusable())) 124 fmt.Fprintf(buf, " Script: %s\n", j.ShellTask.ShellScript) 125 fmt.Fprintf(buf, " Input:") 126 for _, x := range j.ShellTask.DependentFiles.Array() { 127 fmt.Fprintf(buf, " %s", x) 128 } 129 fmt.Fprint(buf, "\n") 130 fmt.Fprintf(buf, " Output:") 131 for _, x := range j.ShellTask.CreatingFiles.Array() { 132 fmt.Fprintf(buf, " %s", x) 133 } 134 fmt.Fprint(buf, "\n") 135 136 fmt.Fprintf(buf, " Dependent Job IDs:") 137 for _, x := range j.ShellTask.DependentTaskID { 138 fmt.Fprintf(buf, " %d", x) 139 } 140 fmt.Fprint(buf, "\n") 141 142 if j.SgeTaskID != "" { 143 fmt.Fprintf(buf, " SGE Task ID: %s\n", strings.TrimSpace(j.SgeTaskID)) 144 } 145 146 fmt.Fprintf(buf, " Log directory: %s\n", j.JobLogRoot) 147 148 if j.State() == JobFailed { 149 logfile, err := os.Open(path.Join(j.JobLogRoot, "script.stderr")) 150 if err == nil { 151 fmt.Fprintf(buf, " - - - - - - Stderr - - - - - -\n") 152 scanner := bufio.NewScanner(logfile) 153 scanner.Split(bufio.ScanLines) 154 for i := 0; i < 3 && scanner.Scan(); i++ { 155 fmt.Fprintf(buf, " %s\n", scanner.Text()) 156 } 157 } 158 } 159 } 160 161 return string(buf.Bytes()) 162 } 163 164 type JobLog struct { 165 JobLogRoot string 166 InputFiles []FileLog 167 OutputFiles []FileLog 168 IsStarted bool 169 IsAnyInputChanged bool 170 IsDone bool 171 IsAnyOutputChanged bool 172 ExitCode int 173 ScriptExitCode int 174 ShellTask *ShellTask 175 SgeTaskID string 176 } 177 178 func (v *JobLog) String() string { 179 return fmt.Sprintf(` JobLog{ 180 JobLogRoot: %s, 181 InputFiles: %s, 182 OutputFiles: %s, 183 IsReusable: %v, 184 IsStarted: %v, 185 IsAnyInputChanged: %v, 186 IsDone: %v, 187 IsAnyOutputChanged: %v, 188 ExitCode: %d, 189 ScriptExitCode: %d, 190 ShellTask: %s 191 } 192 `, strconv.Quote(v.JobLogRoot), v.InputFiles, v.OutputFiles, v.IsReusable(), v.IsStarted, v.IsAnyInputChanged, v.IsDone, v.IsAnyOutputChanged, v.ExitCode, v.ScriptExitCode, v.ShellTask.String()) 193 } 194 195 func (v *JobLog) State() JobState { 196 if v.IsDone && v.ExitCode == 0 { 197 return JobDone 198 } else if v.IsDone { 199 return JobFailed 200 } else if v.IsStarted { 201 return JobRunning 202 } else { 203 return JobPending 204 } 205 } 206 207 func (v *JobLog) IsReusable() bool { 208 return v.IsDone && v.IsStarted && !v.IsAnyInputChanged && !v.IsAnyOutputChanged && v.ExitCode == 0 209 } 210 211 func LoadJsonFromFile(path string, obj interface{}) error { 212 file, err := os.Open(path) 213 if err != nil { 214 return err 215 } 216 defer file.Close() 217 decoder := json.NewDecoder(file) 218 return decoder.Decode(obj) 219 } 220 221 func CollectLogsForOneJob(jobRoot string, oneTask *ShellTask) (*JobLog, error) { 222 // check return code file 223 var jobDone = true 224 var exitCode = -1 225 var scriptExitCode = -1 226 var exitCodeFile io.Reader 227 var err error 228 exitCodeFile, err = os.Open(path.Join(jobRoot, "rc")) 229 if os.IsNotExist(err) { 230 followUpDone, err := FollowUpLocalSingle(jobRoot) 231 if err != nil { 232 return nil, fmt.Errorf("Failed to follow up %s : %s", jobRoot, err.Error()) 233 } 234 if !followUpDone { 235 followUpDone, err = FollowUpSge(jobRoot) 236 if err != nil { 237 return nil, fmt.Errorf("Failed to follow up %s : %s", jobRoot, err.Error()) 238 } 239 } 240 if !followUpDone { 241 exitCodeFile = bytes.NewReader([]byte("1000")) 242 } else { 243 exitCodeFile, err = os.Open(path.Join(jobRoot, "rc")) 244 } 245 } 246 247 if err == nil { 248 data, err := ioutil.ReadAll(exitCodeFile) 249 if err != nil { 250 return nil, err 251 } 252 exitCode64, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 32) 253 if err != nil { 254 return nil, err 255 } 256 exitCode = int(exitCode64) 257 scriptExitCode = exitCode 258 } else if os.IsNotExist(err) { 259 jobDone = false 260 } else { 261 jobDone = false 262 return nil, err 263 } 264 265 // check input files 266 var inputFiles []FileLog 267 var jobStarted bool 268 var anyInputChanged = false 269 err = LoadJsonFromFile(path.Join(jobRoot, "input.json"), &inputFiles) 270 if err == nil { 271 jobStarted = true 272 for _, v := range inputFiles { 273 //inputFiles[i].Content = nil 274 chg, err := v.IsChanged() 275 if err != nil { 276 return nil, err 277 } 278 if chg { 279 anyInputChanged = true 280 } 281 } 282 } else if os.IsNotExist(err) { 283 jobStarted = false 284 } else { 285 fmt.Fprintf(os.Stderr, "Failed to load %s\n", err.Error()) 286 jobStarted = false 287 } 288 289 // check output files 290 var outputFiles []FileLog 291 var anyOutputChanged = false 292 var outputFound = false 293 err = LoadJsonFromFile(path.Join(jobRoot, "output.json"), &outputFiles) 294 if err == nil { 295 for _, v := range outputFiles { 296 chg, err := v.IsChanged() 297 //outputFiles[i].Content = nil 298 if err != nil { 299 return nil, err 300 } 301 if chg { 302 anyOutputChanged = true 303 } 304 } 305 outputFound = true 306 } else if os.IsNotExist(err) { 307 outputFound = false 308 } else { 309 fmt.Fprintf(os.Stderr, "Failed to load %s\n", err.Error()) 310 outputFound = false 311 } 312 313 if jobDone && exitCode == 0 && !outputFound { 314 exitCode = 1000 315 } 316 317 // check SGE task id 318 var sgeTaskID string 319 sgeTaskIDFile, err := os.Open(path.Join(jobRoot, "sge-taskid.txt")) 320 if err == nil { 321 defer sgeTaskIDFile.Close() 322 data, err := ioutil.ReadAll(sgeTaskIDFile) 323 if err != nil { 324 return nil, err 325 } 326 sgeTaskID = string(data) 327 } else if os.IsNotExist(err) { 328 // ignore 329 } else { 330 return nil, err 331 } 332 333 return &JobLog{ 334 JobLogRoot: jobRoot, 335 InputFiles: inputFiles, 336 OutputFiles: outputFiles, 337 IsStarted: jobStarted, 338 IsAnyInputChanged: anyInputChanged, 339 IsDone: jobDone, 340 IsAnyOutputChanged: anyOutputChanged, 341 ExitCode: exitCode, 342 ScriptExitCode: scriptExitCode, 343 ShellTask: oneTask, 344 SgeTaskID: sgeTaskID, 345 }, nil 346 } 347 348 const workflowLogCacheFileName = "workflowLogCache.json.gz" 349 350 func CollectLogsForOneWorkFromCache(logdirPath string) (*WorkflowLog, error) { 351 cacheFile, err := os.OpenFile(path.Join(logdirPath, workflowLogCacheFileName), os.O_RDWR, 0644) 352 if err != nil { 353 return nil, err 354 } 355 defer cacheFile.Close() 356 cacheFileUngzip, err := gzip.NewReader(cacheFile) 357 if err != nil { 358 return nil, err 359 } 360 jsonDecoder := json.NewDecoder(cacheFileUngzip) 361 362 var workflowLog WorkflowLog 363 err = jsonDecoder.Decode(&workflowLog) 364 if err != nil { 365 return nil, err 366 } 367 368 var dependentFiles []FileLog 369 err = LoadJsonFromFile(path.Join(logdirPath, "input.json"), &dependentFiles) 370 if err != nil { 371 return nil, err 372 } 373 374 changedInput := make([]string, 0) 375 for _, oneDependent := range dependentFiles { 376 changed, err := oneDependent.IsChanged() 377 if err != nil { 378 return nil, err 379 } 380 if changed { 381 changedInput = append(changedInput, oneDependent.Relpath) 382 } 383 } 384 workflowLog.ChangedInput = changedInput 385 386 // re-check files 387 for i, job := range workflowLog.JobLogs { 388 if job.IsDone { 389 if !job.IsAnyInputChanged { 390 anyInputChanged := false 391 for _, v := range job.InputFiles { 392 changed, err := v.IsChanged() 393 if err != nil { 394 return nil, err 395 } 396 397 if changed { 398 anyInputChanged = true 399 break 400 } 401 } 402 job.IsAnyInputChanged = anyInputChanged 403 } 404 405 if !job.IsAnyOutputChanged { 406 anyOutputChanged := false 407 for _, v := range job.OutputFiles { 408 changed, err := v.IsChanged() 409 if err != nil { 410 return nil, err 411 } 412 413 if changed { 414 anyOutputChanged = true 415 break 416 } 417 } 418 job.IsAnyOutputChanged = anyOutputChanged 419 } 420 421 } else { 422 fmt.Fprintf(os.Stderr, "rescanning %s\n", job.JobLogRoot) 423 newJob, err := CollectLogsForOneJob(job.JobLogRoot, job.ShellTask) 424 if err != nil { 425 return nil, err 426 } 427 workflowLog.JobLogs[i] = newJob 428 } 429 } 430 431 cacheFile.Seek(0, os.SEEK_SET) 432 cacheFileGzip := gzip.NewWriter(cacheFile) 433 defer cacheFileGzip.Close() 434 435 cacheEncoder := json.NewEncoder(cacheFileGzip) 436 cacheEncoder.SetIndent("", " ") 437 err = cacheEncoder.Encode(workflowLog) 438 if err != nil { 439 return nil, fmt.Errorf("Failed to encode workflow log cache: %s", err.Error()) 440 } 441 442 return &workflowLog, nil 443 } 444 445 func CollectLogsForOneWork(logdirPath string) (*WorkflowLog, error) { 446 workflowLog, err := CollectLogsForOneWorkFromCache(logdirPath) 447 if os.IsNotExist(err) { 448 workflowLog, err = CollectLogsForOneWorkWithScan(logdirPath) 449 } 450 if err != nil { 451 return nil, fmt.Errorf("Error while scanning workflow log: %s", err.Error()) 452 } 453 return workflowLog, nil 454 } 455 456 func CollectLogsForOneWorkWithScan(logdirPath string) (*WorkflowLog, error) { 457 jobs := make([]*JobLog, 0) 458 459 var metadata WorkflowMetaData 460 err := LoadJsonFromFile(path.Join(logdirPath, "runtime.json"), &metadata) 461 if err != nil { 462 return nil, nil 463 } 464 465 var dependentFiles []FileLog 466 err = LoadJsonFromFile(path.Join(logdirPath, "input.json"), &dependentFiles) 467 if err != nil { 468 return nil, err 469 } 470 471 changedInput := make([]string, 0) 472 for _, oneDependent := range dependentFiles { 473 changed, err := oneDependent.IsChanged() 474 if err != nil { 475 return nil, err 476 } 477 if changed { 478 changedInput = append(changedInput, oneDependent.Relpath) 479 } 480 } 481 482 for _, oneTask := range metadata.Tasks { 483 jobRoot := path.Join(logdirPath, fmt.Sprintf("job%03d", oneTask.ID)) 484 oneJob, err := CollectLogsForOneJob(jobRoot, oneTask) 485 if err != nil { 486 return nil, err 487 } 488 489 jobs = append(jobs, oneJob) 490 } 491 492 result := &WorkflowLog{ 493 WorkflowScript: metadata.WorkflowPath, 494 WorkflowLogRoot: logdirPath, 495 ParameterFile: metadata.ParameterFile, 496 StartDate: metadata.Date, 497 JobLogs: jobs, 498 ChangedInput: changedInput, 499 } 500 501 cacheFile, err := os.OpenFile(path.Join(logdirPath, workflowLogCacheFileName), os.O_CREATE|os.O_WRONLY, 0644) 502 if err != nil { 503 return nil, fmt.Errorf("Cannot open cache file: %s", err.Error()) 504 } 505 defer cacheFile.Close() 506 cacheFileGzip := gzip.NewWriter(cacheFile) 507 defer cacheFileGzip.Close() 508 509 cacheEncoder := json.NewEncoder(cacheFileGzip) 510 cacheEncoder.SetIndent("", " ") 511 err = cacheEncoder.Encode(result) 512 if err != nil { 513 return nil, fmt.Errorf("Failed to encode workflow log cache: %s", err.Error()) 514 } 515 516 return result, nil 517 } 518 519 func CollectLogs(logdir string) (WorkflowLogArray, error) { 520 logdirFile, err := os.Open(logdir) 521 if os.IsNotExist(err) { 522 return WorkflowLogArray([]*WorkflowLog{}), nil 523 } 524 if err != nil { 525 return nil, fmt.Errorf("Cannot open log directory in collectlogs: %s", err.Error()) 526 } 527 528 workflowLogDirs, err := logdirFile.Readdir(0) 529 if err != nil { 530 return nil, fmt.Errorf("Cannot read log directory in collectlogs: %s", err.Error()) 531 } 532 533 workflowLogs := WorkflowLogArray(make([]*WorkflowLog, 0)) 534 535 for _, oneWorkflowLogDir := range workflowLogDirs { 536 if !oneWorkflowLogDir.IsDir() { 537 continue 538 } 539 logdirPath := path.Join(logdir, oneWorkflowLogDir.Name()) 540 541 one, err := CollectLogsForOneWork(logdirPath) 542 if err != nil { 543 return nil, err 544 } 545 if one != nil { 546 workflowLogs = append(workflowLogs, one) 547 } 548 } 549 550 sort.Sort(workflowLogs) 551 552 return workflowLogs, nil 553 } 554 555 type WorkflowLogArray []*WorkflowLog 556 557 func (v WorkflowLogArray) Len() int { return len(v) } 558 func (v WorkflowLogArray) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 559 func (v WorkflowLogArray) Less(i, j int) bool { return v[i].StartDate.Before(v[j].StartDate) } 560 561 func (v WorkflowLogArray) SearchReusableJob(shellscript string, workdir string, dependentFiles flowscript.StringSet, creatingFiles flowscript.StringSet) *JobLog { 562 563 for _, x := range v { 564 for _, y := range x.JobLogs { 565 if (!y.IsReusable()) || (y.ShellTask.ShellScript != shellscript) || (!reflect.DeepEqual(y.ShellTask.DependentFiles, dependentFiles)) { 566 continue 567 } 568 //fmt.Printf("found %s\n", y.JobLogRoot) 569 return y 570 } 571 } 572 573 return nil 574 } 575 576 const viewLogShowMax = 10 577 578 func ViewLog(showAll bool, failedOnly bool) error { 579 logs, err := CollectLogs(WorkflowLogDir) 580 if err != nil { 581 return err 582 } 583 584 fmt.Printf("%3s|%7s|Success|Failed|Running|Pending|File Changed|%-19s|Name\n", "#", "State", "Start Date") 585 586 showLogs := make([]int, 0) 587 588 count := 0 589 for i := len(logs) - 1; i >= 0; i-- { 590 if !showAll && count >= viewLogShowMax { 591 break 592 } 593 if !failedOnly || logs[i].State() == WorkflowFailed { 594 count++ 595 showLogs = append(showLogs, i) 596 } 597 } 598 599 sort.Sort(sort.IntSlice(showLogs)) 600 601 for _, i := range showLogs { 602 v := logs[i] 603 604 files := "No" 605 if v.IsChanged() { 606 files = "Yes" 607 } 608 609 successJobs := 0 610 failedJobs := 0 611 runningJobs := 0 612 notStartedJobs := 0 613 614 for _, x := range v.JobLogs { 615 switch x.State() { 616 case JobDone: 617 successJobs++ 618 case JobFailed: 619 failedJobs++ 620 case JobRunning: 621 runningJobs++ 622 case JobPending: 623 notStartedJobs++ 624 } 625 } 626 627 name := path.Base(v.WorkflowScript) 628 if v.ParameterFile != "" { 629 name += " " + path.Base(v.ParameterFile) 630 } 631 632 fmt.Printf("%3d|%7s|%7d|%6d|%7d|%7d|%12s|%10s|%s\n", i+1, v.State().String()[8:], successJobs, failedJobs, runningJobs, notStartedJobs, files, v.StartDate.Format("2006/01/02 15:04:05"), name) 633 } 634 return nil 635 } 636 637 func ViewLogDetail(args []string, failedOnly bool) error { 638 logs, err := CollectLogs(WorkflowLogDir) 639 if err != nil { 640 return err 641 } 642 643 fmt.Printf("len: %d\n", len(logs)) 644 first := true 645 for _, v := range args { 646 val, err := strconv.ParseInt(v, 10, 32) 647 if err == nil && val > 0 && val <= int64(len(logs)) { 648 if first { 649 first = false 650 } else { 651 fmt.Printf("===========================") 652 } 653 fmt.Printf("%s", logs[val-1].Summary(failedOnly)) 654 } else { 655 fmt.Fprintf(os.Stderr, "Bad Workflow Record Number: %s\n", v) 656 } 657 } 658 return nil 659 }