github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/process_dag.go (about) 1 package processor 2 3 import ( 4 "encoding/json" 5 "fmt" 6 "regexp" 7 "strings" 8 "sync" 9 "time" 10 11 "github.com/Jeffail/benthos/v3/internal/docs" 12 "github.com/Jeffail/benthos/v3/internal/interop" 13 "github.com/Jeffail/benthos/v3/internal/tracing" 14 "github.com/Jeffail/benthos/v3/lib/log" 15 "github.com/Jeffail/benthos/v3/lib/metrics" 16 "github.com/Jeffail/benthos/v3/lib/types" 17 "github.com/quipo/dependencysolver" 18 ) 19 20 //------------------------------------------------------------------------------ 21 22 func init() { 23 Constructors[TypeProcessDAG] = TypeSpec{ 24 constructor: NewProcessDAG, 25 Summary: ` 26 A processor that manages a map of ` + "`process_map`" + ` processors and 27 calculates a Directed Acyclic Graph (DAG) of their dependencies by referring to 28 their postmap targets for provided fields and their premap targets for required 29 fields.`, 30 Status: docs.StatusDeprecated, 31 Description: ` 32 ## Alternatives 33 34 All functionality of this processor has been superseded by the 35 [workflow](/docs/components/processors/workflow) processor. 36 37 The names of workflow stages may only contain alphanumeric, underscore and dash 38 characters (they must match the regular expression ` + "`[a-zA-Z0-9_-]+`" + `). 39 40 The DAG is then used to execute the children in the necessary order with the 41 maximum parallelism possible. You can read more about workflows in Benthos 42 [in this document](/docs/configuration/workflows). 43 44 The field ` + "`dependencies`" + ` is an optional array of fields that a child 45 depends on. This is useful for when fields are required but don't appear within 46 a premap such as those used in conditions. 47 48 This processor is extremely useful for performing a complex mesh of enrichments 49 where network requests mean we desire maximum parallelism across those 50 enrichments.`, 51 Footnotes: ` 52 ## Examples 53 54 If we had three target HTTP services that we wished to enrich each 55 document with - foo, bar and baz - where baz relies on the result of both foo 56 and bar, we might express that relationship here like so: 57 58 ` + "``` yaml" + ` 59 process_dag: 60 foo: 61 premap: 62 .: . 63 processors: 64 - http: 65 url: http://foo/enrich 66 postmap: 67 foo_result: . 68 69 bar: 70 premap: 71 .: msg.sub.path 72 processors: 73 - http: 74 url: http://bar/enrich 75 postmap: 76 bar_result: . 77 78 baz: 79 premap: 80 foo_obj: foo_result 81 bar_obj: bar_result 82 processors: 83 - http: 84 url: http://baz/enrich 85 postmap: 86 baz_obj: . 87 ` + "```" + ` 88 89 With this config the DAG would determine that the children foo and bar can be 90 executed in parallel, and once they are both finished we may proceed onto baz.`, 91 config: docs.FieldComponent().Map().WithChildren( 92 docs.FieldDeprecated("dependencies"), 93 docs.FieldDeprecated("conditions"), 94 docs.FieldDeprecated("parts"), 95 docs.FieldDeprecated("premap"), 96 docs.FieldDeprecated("premap_optional"), 97 docs.FieldDeprecated("processors").Array().HasType(docs.FieldTypeProcessor), 98 docs.FieldDeprecated("postmap"), 99 docs.FieldDeprecated("postmap_optional"), 100 ), 101 } 102 } 103 104 //------------------------------------------------------------------------------ 105 106 // DAGDepsConfig is a config containing dependency based configuration values 107 // for a ProcessDAG child. 108 type DAGDepsConfig struct { 109 Dependencies []string `json:"dependencies" yaml:"dependencies"` 110 } 111 112 // NewDAGDepsConfig returns a default DAGDepsConfig. 113 func NewDAGDepsConfig() DAGDepsConfig { 114 return DAGDepsConfig{ 115 Dependencies: []string{}, 116 } 117 } 118 119 // UnmarshalJSON ensures that when parsing configs that are in a slice the 120 // default values are still applied. 121 func (p *DAGDepsConfig) UnmarshalJSON(bytes []byte) error { 122 type confAlias DAGDepsConfig 123 aliased := confAlias(NewDAGDepsConfig()) 124 125 if err := json.Unmarshal(bytes, &aliased); err != nil { 126 return err 127 } 128 129 *p = DAGDepsConfig(aliased) 130 return nil 131 } 132 133 // UnmarshalYAML ensures that when parsing configs that are in a slice the 134 // default values are still applied. 135 func (p *DAGDepsConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { 136 type confAlias DAGDepsConfig 137 aliased := confAlias(NewDAGDepsConfig()) 138 139 if err := unmarshal(&aliased); err != nil { 140 return err 141 } 142 143 *p = DAGDepsConfig(aliased) 144 return nil 145 } 146 147 // DepProcessMapConfig contains a superset of a ProcessMap config and some DAG 148 // specific fields. 149 type DepProcessMapConfig struct { 150 DAGDepsConfig `json:",inline" yaml:",inline"` 151 ProcessMapConfig `json:",inline" yaml:",inline"` 152 } 153 154 // NewDepProcessMapConfig returns a default DepProcessMapConfig. 155 func NewDepProcessMapConfig() DepProcessMapConfig { 156 return DepProcessMapConfig{ 157 DAGDepsConfig: NewDAGDepsConfig(), 158 ProcessMapConfig: NewProcessMapConfig(), 159 } 160 } 161 162 //------------------------------------------------------------------------------ 163 164 // ProcessDAGConfig is a config struct containing fields for the 165 // ProcessDAG processor. 166 type ProcessDAGConfig map[string]DepProcessMapConfig 167 168 // NewProcessDAGConfig returns a default ProcessDAGConfig. 169 func NewProcessDAGConfig() ProcessDAGConfig { 170 return ProcessDAGConfig{} 171 } 172 173 //------------------------------------------------------------------------------ 174 175 // ProcessDAG is a processor that applies a list of child processors to a new 176 // payload mapped from the original, and after processing attempts to overlay 177 // the results back onto the original payloads according to more mappings. 178 type ProcessDAG struct { 179 children map[string]*ProcessMap 180 dag [][]string 181 182 log log.Modular 183 184 mCount metrics.StatCounter 185 mErr metrics.StatCounter 186 mSent metrics.StatCounter 187 mBatchSent metrics.StatCounter 188 } 189 190 var processDAGStageName = regexp.MustCompile("[a-zA-Z0-9-_]+") 191 192 // NewProcessDAG returns a ProcessField processor. 193 func NewProcessDAG( 194 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 195 ) (Type, error) { 196 children := map[string]*ProcessMap{} 197 explicitDeps := map[string][]string{} 198 199 for k, v := range conf.ProcessDAG { 200 if len(processDAGStageName.FindString(k)) != len(k) { 201 return nil, fmt.Errorf("workflow stage name '%v' contains invalid characters", k) 202 } 203 204 mMgr, mLog, mStats := interop.LabelChild(k, mgr, log, stats) 205 child, err := NewProcessMap(v.ProcessMapConfig, mMgr, mLog, mStats) 206 if err != nil { 207 return nil, fmt.Errorf("failed to create child process_map '%v': %v", k, err) 208 } 209 210 children[k] = child 211 explicitDeps[k] = v.Dependencies 212 } 213 214 dag, err := resolveProcessMapDAG(explicitDeps, children) 215 if err != nil { 216 return nil, err 217 } 218 219 p := &ProcessDAG{ 220 children: children, 221 dag: dag, 222 223 log: log, 224 225 mCount: stats.GetCounter("count"), 226 mErr: stats.GetCounter("error"), 227 mSent: stats.GetCounter("sent"), 228 mBatchSent: stats.GetCounter("batch.sent"), 229 } 230 231 p.log.Infof("Resolved DAG: %v\n", p.dag) 232 return p, nil 233 } 234 235 //------------------------------------------------------------------------------ 236 237 // ProcessMessage applies the processor to a message, either creating >0 238 // resulting messages or a response to be sent back to the message source. 239 func (p *ProcessDAG) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 240 p.mCount.Incr(1) 241 242 result := msg.DeepCopy() 243 result.Iter(func(i int, p types.Part) error { 244 _ = p.Get() 245 _, _ = p.JSON() 246 _ = p.Metadata() 247 return nil 248 }) 249 250 propMsg, propSpans := tracing.WithChildSpans(TypeProcessDAG, result) 251 252 for _, layer := range p.dag { 253 results := make([]types.Message, len(layer)) 254 errors := make([]error, len(layer)) 255 256 wg := sync.WaitGroup{} 257 wg.Add(len(layer)) 258 for i, eid := range layer { 259 go func(id string, index int) { 260 var resSpans []*tracing.Span 261 results[index], resSpans = tracing.WithChildSpans(id, propMsg.Copy()) 262 errors[index] = p.children[id].CreateResult(results[index]) 263 for _, s := range resSpans { 264 s.Finish() 265 } 266 wg.Done() 267 }(eid, i) 268 } 269 wg.Wait() 270 271 for i, id := range layer { 272 if err := errors[i]; err != nil { 273 p.log.Errorf("Failed to perform child '%v': %v\n", id, err) 274 result.Iter(func(i int, p types.Part) error { 275 FlagErr(p, err) 276 return nil 277 }) 278 continue 279 } 280 if failed, err := p.children[id].OverlayResult(result, results[i]); err != nil { 281 p.log.Errorf("Failed to overlay child '%v': %v\n", id, err) 282 result.Iter(func(i int, p types.Part) error { 283 FlagErr(p, err) 284 return nil 285 }) 286 continue 287 } else { 288 for _, j := range failed { 289 FlagErr(result.Get(j), fmt.Errorf("enrichment '%v' postmap failed", id)) 290 } 291 } 292 } 293 } 294 295 for _, s := range propSpans { 296 s.Finish() 297 } 298 299 p.mBatchSent.Incr(1) 300 p.mSent.Incr(int64(result.Len())) 301 302 msgs := [1]types.Message{result} 303 return msgs[:], nil 304 } 305 306 //------------------------------------------------------------------------------ 307 308 func getProcessMapDeps(id string, wanted []string, procs map[string]*ProcessMap) []string { 309 dependencies := []string{} 310 targetsNeeded := wanted 311 312 for k, v := range procs { 313 if k == id { 314 continue 315 } 316 for _, tp := range v.TargetsProvided() { 317 for _, tn := range targetsNeeded { 318 if strings.HasPrefix(tn, tp) { 319 dependencies = append(dependencies, k) 320 break 321 } 322 } 323 } 324 } 325 326 return dependencies 327 } 328 329 func resolveProcessMapDAG(explicitDeps map[string][]string, procs map[string]*ProcessMap) ([][]string, error) { 330 if len(procs) == 0 { 331 return [][]string{}, nil 332 } 333 targetProcs := map[string]struct{}{} 334 335 var entries []dependencysolver.Entry 336 for id, e := range procs { 337 wanted := explicitDeps[id] 338 wanted = append(wanted, e.TargetsUsed()...) 339 340 targetProcs[id] = struct{}{} 341 entries = append(entries, dependencysolver.Entry{ 342 ID: id, Deps: getProcessMapDeps(id, wanted, procs), 343 }) 344 } 345 layers := dependencysolver.LayeredTopologicalSort(entries) 346 for _, l := range layers { 347 for _, id := range l { 348 delete(targetProcs, id) 349 } 350 } 351 if len(targetProcs) > 0 { 352 var tProcs []string 353 for k := range targetProcs { 354 tProcs = append(tProcs, k) 355 } 356 return nil, fmt.Errorf("failed to resolve DAG, circular dependencies detected for targets: %v", tProcs) 357 } 358 return layers, nil 359 } 360 361 // CloseAsync shuts down the processor and stops processing requests. 362 func (p *ProcessDAG) CloseAsync() { 363 for _, c := range p.children { 364 c.CloseAsync() 365 } 366 } 367 368 // WaitForClose blocks until the processor has closed down. 369 func (p *ProcessDAG) WaitForClose(timeout time.Duration) error { 370 stopBy := time.Now().Add(timeout) 371 for _, c := range p.children { 372 if err := c.WaitForClose(time.Until(stopBy)); err != nil { 373 return err 374 } 375 } 376 return nil 377 } 378 379 //------------------------------------------------------------------------------