github.com/informationsea/shellflow@v0.1.3/shellflow_shelltask.go (about)

     1  package main
     2  
     3  import (
     4  	"fmt"
     5  	"os"
     6  	"path/filepath"
     7  	"regexp"
     8  	"sort"
     9  	"strconv"
    10  	"strings"
    11  
    12  	"github.com/informationsea/shellflow/flowscript"
    13  )
    14  
    15  type ShellTaskBuilder struct {
    16  	CurrentID           int
    17  	Tasks               []*ShellTask
    18  	MissingCreatorFiles flowscript.StringSet
    19  	WorkflowContent     string
    20  	workflowLogs        WorkflowLogArray
    21  	config              *Configuration
    22  }
    23  
    24  func NewShellTaskBuilder() (*ShellTaskBuilder, error) {
    25  	logs, err := CollectLogs(WorkflowLogDir)
    26  	if err != nil {
    27  		return nil, err
    28  	}
    29  
    30  	return &ShellTaskBuilder{
    31  		CurrentID:           0,
    32  		Tasks:               make([]*ShellTask, 0),
    33  		MissingCreatorFiles: flowscript.NewStringSet(),
    34  		workflowLogs:        logs,
    35  	}, nil
    36  }
    37  
    38  func (b *ShellTaskBuilder) CreateShellTask(lineNum int, line string) (*ShellTask, error) {
    39  	var formattedLine strings.Builder
    40  	dependentFiles := flowscript.NewStringSet()
    41  	creatingFiles := flowscript.NewStringSet()
    42  	conf, err := LoadConfiguration()
    43  	if err != nil {
    44  		return nil, fmt.Errorf("Cannot load configuration: %s", err.Error())
    45  	}
    46  
    47  	// extract dependent and creating files
    48  	for {
    49  		inputStart := strings.Index(line, "((")
    50  		outputStart := strings.Index(line, "[[")
    51  
    52  		if inputStart < 0 && outputStart < 0 {
    53  			formattedLine.WriteString(line)
    54  			break
    55  		}
    56  
    57  		var endStr string
    58  		var startPos int
    59  		if (outputStart < 0 && inputStart >= 0) || (inputStart >= 0 && inputStart < outputStart) {
    60  			endStr = "))"
    61  			startPos = inputStart
    62  		} else if (inputStart < 0 && outputStart >= 0) || (outputStart >= 0 && outputStart < inputStart) {
    63  			endStr = "]]"
    64  			startPos = outputStart
    65  		}
    66  
    67  		//fmt.Printf("startPos: %d / %s / %s\n", startPos, line, endStr)
    68  
    69  		formattedLine.WriteString(line[0:startPos])
    70  		line = line[startPos:]
    71  		endPos := strings.Index(line, endStr)
    72  		if endPos < 0 {
    73  			return nil, fmt.Errorf("Closing bracket is not found: %s", endStr)
    74  		}
    75  
    76  		targetStr := line[2:endPos]
    77  		formattedLine.WriteString(targetStr)
    78  		line = line[endPos+2:]
    79  
    80  		var parsedFiles []string
    81  		if strings.ContainsRune(targetStr, '*') || strings.ContainsRune(targetStr, '?') {
    82  			parsedFiles, err = filepath.Glob(targetStr)
    83  		} else {
    84  			parsedFiles = []string{targetStr}
    85  		}
    86  
    87  		switch endStr {
    88  		case "))":
    89  			for _, x := range parsedFiles {
    90  				dependentFiles.Add(x)
    91  			}
    92  		case "]]":
    93  			for _, x := range parsedFiles {
    94  				creatingFiles.Add(x)
    95  			}
    96  		}
    97  	}
    98  
    99  	// creating task dependency
   100  	skippable := true
   101  	dependentTasks := make(map[int]struct{})
   102  	missingCreatorFiles := flowscript.NewStringSet()
   103  	for _, v := range dependentFiles.Array() {
   104  		found := false
   105  		for i := len(b.Tasks) - 1; i >= 0; i-- {
   106  			task := b.Tasks[i]
   107  			if task.CreatingFiles.Contains(v) {
   108  				dependentTasks[task.ID] = struct{}{}
   109  				found = true
   110  				break
   111  			}
   112  		}
   113  		if !found {
   114  			missingCreatorFiles.Add(v)
   115  		}
   116  	}
   117  	b.MissingCreatorFiles.AddAll(missingCreatorFiles)
   118  	dependentTaskID := make([]int, 0)
   119  	for k := range dependentTasks {
   120  		if !b.Tasks[k-1].ShouldSkip {
   121  			skippable = false
   122  		}
   123  		dependentTaskID = append(dependentTaskID, k)
   124  	}
   125  	sort.Ints(dependentTaskID)
   126  
   127  	//fmt.Printf("skippable: %v : %s\n", skippable, formattedLine.String())
   128  	shellScript := formattedLine.String()
   129  
   130  	shouldSkip := false
   131  	var reuseLogPath *JobLog
   132  	if skippable {
   133  		cwd, err := os.Getwd()
   134  		if err != nil {
   135  			return nil, err
   136  		}
   137  		job := b.workflowLogs.SearchReusableJob(shellScript, cwd, dependentFiles, creatingFiles)
   138  		if job != nil { // found
   139  			shouldSkip = true
   140  			reuseLogPath = job
   141  		}
   142  	}
   143  
   144  	// check config
   145  	commandConf := CommandConfiguration{
   146  		RegExp:    "",
   147  		SGEOption: []string{},
   148  	}
   149  	//fmt.Printf("config: %d\n", len(conf.Command))
   150  	for _, v := range conf.Command {
   151  		r, err := regexp.Compile(v.RegExp)
   152  		if err != nil {
   153  			return nil, fmt.Errorf("Invalid regular expression in configuration: %s", err.Error())
   154  		}
   155  		//fmt.Printf("checking %s = %s\n", v.RegExp, shellScript)
   156  		if r.MatchString(shellScript) {
   157  			//fmt.Printf("Match\n")
   158  			commandConf = v
   159  			break
   160  		}
   161  	}
   162  
   163  	b.CurrentID++
   164  	task := ShellTask{
   165  		LineNum:              lineNum,
   166  		ShellScript:          formattedLine.String(),
   167  		ID:                   b.CurrentID,
   168  		DependentFiles:       dependentFiles,
   169  		CreatingFiles:        creatingFiles,
   170  		DependentTaskID:      dependentTaskID,
   171  		ShouldSkip:           shouldSkip,
   172  		ReuseLog:             reuseLogPath,
   173  		CommandConfiguration: commandConf,
   174  	}
   175  
   176  	b.Tasks = append(b.Tasks, &task)
   177  	return &task, nil
   178  }
   179  
   180  func (b *ShellTaskBuilder) CreateDag() string {
   181  	var builder strings.Builder
   182  	builder.WriteString("digraph shelltask {\n  node [shape=box];\n")
   183  	for _, v := range b.Tasks {
   184  		builder.WriteString(fmt.Sprintf("  task%d [label=%s];\n", v.ID, strconv.Quote(v.ShellScript)))
   185  	}
   186  
   187  	for i, v := range b.MissingCreatorFiles.Array() {
   188  		builder.WriteString(fmt.Sprintf("  input%d [label=%s, color=red];\n", i, strconv.Quote(v)))
   189  		for _, v2 := range b.Tasks {
   190  			if v2.DependentFiles.Contains(v) {
   191  				builder.WriteString(fmt.Sprintf("  input%d -> task%d;\n", i, v2.ID))
   192  			}
   193  		}
   194  	}
   195  
   196  	for _, v := range b.Tasks {
   197  		for _, x := range v.DependentTaskID {
   198  			files := v.DependentFiles.Intersect(b.Tasks[x-1].CreatingFiles)
   199  			for _, oneFile := range files.Array() {
   200  				builder.WriteString(fmt.Sprintf("  task%d -> task%d [label=%s];\n", x, v.ID, strconv.Quote(oneFile)))
   201  			}
   202  		}
   203  	}
   204  
   205  	allCreatedFiles := make(map[string]int)
   206  	allDependentFiles := make(map[string]int)
   207  
   208  	for _, v := range b.Tasks {
   209  		for _, one := range v.DependentFiles.Array() {
   210  			allDependentFiles[one] = v.ID
   211  		}
   212  		for _, one := range v.CreatingFiles.Array() {
   213  			allCreatedFiles[one] = v.ID
   214  		}
   215  	}
   216  
   217  	outputID := 0
   218  	for k, v := range allCreatedFiles {
   219  		_, ok := allDependentFiles[k]
   220  		if !ok {
   221  			outputID++
   222  			builder.WriteString(fmt.Sprintf("  output%d [label=%s, color=blue];\n", outputID, strconv.Quote(k)))
   223  			builder.WriteString(fmt.Sprintf("  task%d -> output%d;\n", v, outputID))
   224  		}
   225  	}
   226  
   227  	builder.WriteString("}\n")
   228  	return builder.String()
   229  }
   230  
   231  type ShellTask struct {
   232  	LineNum              int
   233  	ID                   int
   234  	ShellScript          string
   235  	DependentFiles       flowscript.StringSet
   236  	CreatingFiles        flowscript.StringSet
   237  	DependentTaskID      []int
   238  	ShouldSkip           bool
   239  	ReuseLog             *JobLog
   240  	CommandConfiguration CommandConfiguration
   241  }
   242  
   243  func (v *ShellTask) String() string {
   244  	return fmt.Sprintf("SellTask{\n  LineNum: %d, ID: %d,\n  ShellScript: %s,\n  DependentFiles: %s,\n  CreatingFiles: %s,\n  DependentTaskID: %d,\n  ShouldSkip: %v,\n  SGEOption: %s\n}", v.LineNum, v.ID, v.ShellScript, v.DependentFiles.Array(), v.CreatingFiles.Array(), v.DependentTaskID, v.ShouldSkip, v.CommandConfiguration.String())
   245  }