github.com/informationsea/shellflow@v0.1.3/shellflow_shelltask.go (about) 1 package main 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "regexp" 8 "sort" 9 "strconv" 10 "strings" 11 12 "github.com/informationsea/shellflow/flowscript" 13 ) 14 15 type ShellTaskBuilder struct { 16 CurrentID int 17 Tasks []*ShellTask 18 MissingCreatorFiles flowscript.StringSet 19 WorkflowContent string 20 workflowLogs WorkflowLogArray 21 config *Configuration 22 } 23 24 func NewShellTaskBuilder() (*ShellTaskBuilder, error) { 25 logs, err := CollectLogs(WorkflowLogDir) 26 if err != nil { 27 return nil, err 28 } 29 30 return &ShellTaskBuilder{ 31 CurrentID: 0, 32 Tasks: make([]*ShellTask, 0), 33 MissingCreatorFiles: flowscript.NewStringSet(), 34 workflowLogs: logs, 35 }, nil 36 } 37 38 func (b *ShellTaskBuilder) CreateShellTask(lineNum int, line string) (*ShellTask, error) { 39 var formattedLine strings.Builder 40 dependentFiles := flowscript.NewStringSet() 41 creatingFiles := flowscript.NewStringSet() 42 conf, err := LoadConfiguration() 43 if err != nil { 44 return nil, fmt.Errorf("Cannot load configuration: %s", err.Error()) 45 } 46 47 // extract dependent and creating files 48 for { 49 inputStart := strings.Index(line, "((") 50 outputStart := strings.Index(line, "[[") 51 52 if inputStart < 0 && outputStart < 0 { 53 formattedLine.WriteString(line) 54 break 55 } 56 57 var endStr string 58 var startPos int 59 if (outputStart < 0 && inputStart >= 0) || (inputStart >= 0 && inputStart < outputStart) { 60 endStr = "))" 61 startPos = inputStart 62 } else if (inputStart < 0 && outputStart >= 0) || (outputStart >= 0 && outputStart < inputStart) { 63 endStr = "]]" 64 startPos = outputStart 65 } 66 67 //fmt.Printf("startPos: %d / %s / %s\n", startPos, line, endStr) 68 69 formattedLine.WriteString(line[0:startPos]) 70 line = line[startPos:] 71 endPos := strings.Index(line, endStr) 72 if endPos < 0 { 73 return nil, fmt.Errorf("Closing bracket is not found: %s", endStr) 74 } 75 76 targetStr := line[2:endPos] 77 formattedLine.WriteString(targetStr) 78 line = line[endPos+2:] 79 80 var parsedFiles []string 81 if strings.ContainsRune(targetStr, '*') || strings.ContainsRune(targetStr, '?') { 82 parsedFiles, err = filepath.Glob(targetStr) 83 } else { 84 parsedFiles = []string{targetStr} 85 } 86 87 switch endStr { 88 case "))": 89 for _, x := range parsedFiles { 90 dependentFiles.Add(x) 91 } 92 case "]]": 93 for _, x := range parsedFiles { 94 creatingFiles.Add(x) 95 } 96 } 97 } 98 99 // creating task dependency 100 skippable := true 101 dependentTasks := make(map[int]struct{}) 102 missingCreatorFiles := flowscript.NewStringSet() 103 for _, v := range dependentFiles.Array() { 104 found := false 105 for i := len(b.Tasks) - 1; i >= 0; i-- { 106 task := b.Tasks[i] 107 if task.CreatingFiles.Contains(v) { 108 dependentTasks[task.ID] = struct{}{} 109 found = true 110 break 111 } 112 } 113 if !found { 114 missingCreatorFiles.Add(v) 115 } 116 } 117 b.MissingCreatorFiles.AddAll(missingCreatorFiles) 118 dependentTaskID := make([]int, 0) 119 for k := range dependentTasks { 120 if !b.Tasks[k-1].ShouldSkip { 121 skippable = false 122 } 123 dependentTaskID = append(dependentTaskID, k) 124 } 125 sort.Ints(dependentTaskID) 126 127 //fmt.Printf("skippable: %v : %s\n", skippable, formattedLine.String()) 128 shellScript := formattedLine.String() 129 130 shouldSkip := false 131 var reuseLogPath *JobLog 132 if skippable { 133 cwd, err := os.Getwd() 134 if err != nil { 135 return nil, err 136 } 137 job := b.workflowLogs.SearchReusableJob(shellScript, cwd, dependentFiles, creatingFiles) 138 if job != nil { // found 139 shouldSkip = true 140 reuseLogPath = job 141 } 142 } 143 144 // check config 145 commandConf := CommandConfiguration{ 146 RegExp: "", 147 SGEOption: []string{}, 148 } 149 //fmt.Printf("config: %d\n", len(conf.Command)) 150 for _, v := range conf.Command { 151 r, err := regexp.Compile(v.RegExp) 152 if err != nil { 153 return nil, fmt.Errorf("Invalid regular expression in configuration: %s", err.Error()) 154 } 155 //fmt.Printf("checking %s = %s\n", v.RegExp, shellScript) 156 if r.MatchString(shellScript) { 157 //fmt.Printf("Match\n") 158 commandConf = v 159 break 160 } 161 } 162 163 b.CurrentID++ 164 task := ShellTask{ 165 LineNum: lineNum, 166 ShellScript: formattedLine.String(), 167 ID: b.CurrentID, 168 DependentFiles: dependentFiles, 169 CreatingFiles: creatingFiles, 170 DependentTaskID: dependentTaskID, 171 ShouldSkip: shouldSkip, 172 ReuseLog: reuseLogPath, 173 CommandConfiguration: commandConf, 174 } 175 176 b.Tasks = append(b.Tasks, &task) 177 return &task, nil 178 } 179 180 func (b *ShellTaskBuilder) CreateDag() string { 181 var builder strings.Builder 182 builder.WriteString("digraph shelltask {\n node [shape=box];\n") 183 for _, v := range b.Tasks { 184 builder.WriteString(fmt.Sprintf(" task%d [label=%s];\n", v.ID, strconv.Quote(v.ShellScript))) 185 } 186 187 for i, v := range b.MissingCreatorFiles.Array() { 188 builder.WriteString(fmt.Sprintf(" input%d [label=%s, color=red];\n", i, strconv.Quote(v))) 189 for _, v2 := range b.Tasks { 190 if v2.DependentFiles.Contains(v) { 191 builder.WriteString(fmt.Sprintf(" input%d -> task%d;\n", i, v2.ID)) 192 } 193 } 194 } 195 196 for _, v := range b.Tasks { 197 for _, x := range v.DependentTaskID { 198 files := v.DependentFiles.Intersect(b.Tasks[x-1].CreatingFiles) 199 for _, oneFile := range files.Array() { 200 builder.WriteString(fmt.Sprintf(" task%d -> task%d [label=%s];\n", x, v.ID, strconv.Quote(oneFile))) 201 } 202 } 203 } 204 205 allCreatedFiles := make(map[string]int) 206 allDependentFiles := make(map[string]int) 207 208 for _, v := range b.Tasks { 209 for _, one := range v.DependentFiles.Array() { 210 allDependentFiles[one] = v.ID 211 } 212 for _, one := range v.CreatingFiles.Array() { 213 allCreatedFiles[one] = v.ID 214 } 215 } 216 217 outputID := 0 218 for k, v := range allCreatedFiles { 219 _, ok := allDependentFiles[k] 220 if !ok { 221 outputID++ 222 builder.WriteString(fmt.Sprintf(" output%d [label=%s, color=blue];\n", outputID, strconv.Quote(k))) 223 builder.WriteString(fmt.Sprintf(" task%d -> output%d;\n", v, outputID)) 224 } 225 } 226 227 builder.WriteString("}\n") 228 return builder.String() 229 } 230 231 type ShellTask struct { 232 LineNum int 233 ID int 234 ShellScript string 235 DependentFiles flowscript.StringSet 236 CreatingFiles flowscript.StringSet 237 DependentTaskID []int 238 ShouldSkip bool 239 ReuseLog *JobLog 240 CommandConfiguration CommandConfiguration 241 } 242 243 func (v *ShellTask) String() string { 244 return fmt.Sprintf("SellTask{\n LineNum: %d, ID: %d,\n ShellScript: %s,\n DependentFiles: %s,\n CreatingFiles: %s,\n DependentTaskID: %d,\n ShouldSkip: %v,\n SGEOption: %s\n}", v.LineNum, v.ID, v.ShellScript, v.DependentFiles.Array(), v.CreatingFiles.Array(), v.DependentTaskID, v.ShouldSkip, v.CommandConfiguration.String()) 245 }