github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/process_map.go (about) 1 package processor 2 3 import ( 4 "encoding/json" 5 "errors" 6 "fmt" 7 "time" 8 9 "github.com/Jeffail/benthos/v3/internal/docs" 10 "github.com/Jeffail/benthos/v3/internal/interop" 11 "github.com/Jeffail/benthos/v3/internal/tracing" 12 "github.com/Jeffail/benthos/v3/lib/condition" 13 "github.com/Jeffail/benthos/v3/lib/log" 14 "github.com/Jeffail/benthos/v3/lib/message/mapper" 15 "github.com/Jeffail/benthos/v3/lib/metrics" 16 "github.com/Jeffail/benthos/v3/lib/types" 17 ) 18 19 //------------------------------------------------------------------------------ 20 21 var processMapFields = docs.FieldSpecs{ 22 docs.FieldCommon("conditions", "A list of [conditions](/docs/components/conditions/about) to test against messages. If any condition fails then the message will not be mapped and processed.", 23 []interface{}{ 24 map[string]interface{}{ 25 "bloblang": "document.urls.length() > 0", 26 }, 27 }, 28 ).Array().HasType(docs.FieldTypeCondition), 29 docs.FieldString( 30 "premap", "A map of source to destination [paths](/docs/configuration/field_paths) used to create a new object from the original. An empty (or dot `.`) path indicates the root of the object. If a map source is not found then the message will not be processed, for optional sources use the field [`premap_optional`](#premap_optional).", 31 map[string]string{ 32 ".": "field.from.document", 33 }, 34 map[string]string{ 35 "foo": "root.body.foo", 36 "bar.baz": "root.extra.baz", 37 }, 38 ).Map(), 39 docs.FieldString("premap_optional", "A map of optional source to destination [paths](/docs/configuration/field_paths) used to create a new object from the original.").Map(), 40 docs.FieldCommon("processors", "A list of processors to apply to mapped payloads.").Array().HasType(docs.FieldTypeProcessor), 41 docs.FieldString( 42 "postmap", "A map of destination to source [paths](/docs/configuration/field_paths) used to map results from processing back into the original payload. An empty (or dot `.`) path indicates the root of the object. If a source is not found then the mapping is abandoned, for optional sources use the [`postmap_optional`](#postmap_optional) field.", 43 map[string]string{ 44 "results.foo": ".", 45 }, 46 ).Map(), 47 docs.FieldString("postmap_optional", "A map of optional destination to source [paths](/docs/configuration/field_paths) used to map results from processing back into the original payload.").Map(), 48 PartsFieldSpec, 49 } 50 51 func init() { 52 Constructors[TypeProcessMap] = TypeSpec{ 53 constructor: func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 54 return NewProcessMap(conf.ProcessMap, mgr, log, stats) 55 }, 56 FieldSpecs: processMapFields, 57 Summary: ` 58 A processor that extracts and maps fields identified via 59 [dot path](/docs/configuration/field_paths) from the original payload into a new 60 object, applies a list of processors to the newly constructed object, and 61 finally maps the result back into the original payload.`, 62 Status: docs.StatusDeprecated, 63 Description: ` 64 ## Alternatives 65 66 All functionality of this processor has been superseded by the 67 [branch](/docs/components/processors/branch) processor. 68 69 This processor is useful for performing processors on subsections of a payload. 70 For example, you could extract sections of a JSON object in order to construct 71 a reduced request object for an ` + "[`http`](/docs/components/processors/http)" + ` 72 processor, then map the result back into a field within the original object. 73 74 The order of stages of this processor are as follows: 75 76 - [Conditions](#conditions) are tested (if specified) against each message, 77 messages that do not pass will not be processed. 78 - Messages that are flagged for processing are mapped according to the 79 [premap](#premap) fields, creating a new object. If the premap stage fails 80 (targets are not found) the message will not be processed. 81 - Messages that are mapped are processed as a batch. 82 - After all child processors are applied to the mapped messages they are mapped 83 back into the original messages they originated from following the 84 [postmap](#postmap) fields. If the postmap stage fails the mapping is skipped 85 and the message payload remains as it started. 86 87 If the premap is empty then the full payload is sent to the processors, if the 88 postmap is empty then the processed result replaces the original contents 89 entirely. 90 91 ### Batch Ordering 92 93 This processor supports batched messages, but the list of processors to apply 94 must NOT change the ordering (or count) of the messages (do not use a 95 ` + "`group_by`" + ` processor, for example). 96 97 ### Error Handling 98 99 When premap, processing or postmap stages fail the underlying message will 100 remain unchanged, the errors are logged, and the message is flagged as having 101 failed, allowing you to use 102 [standard processor error handling patterns](/docs/configuration/error_handling) 103 for recovery.`, 104 Footnotes: ` 105 ## Examples 106 107 Given a message payload of: 108 109 ` + "```json" + ` 110 { 111 "doc": { 112 "id": "foo", 113 "title": "foo bar baz", 114 "description": "here's a thing", 115 "content": "this is a body" 116 } 117 } 118 ` + "```" + ` 119 120 We might wish to perform language detection on the ` + "`doc.content`" + ` field 121 by sending it to a hypothetical HTTP service. We do not wish to overwrite the 122 original document with the result, and instead want to place it within the path 123 ` + "`doc.language`" + `, and so this is a good use case for ` + "`process_map`" + `: 124 125 ` + "```yaml" + ` 126 pipeline: 127 processors: 128 - process_map: 129 premap: 130 content: doc.content 131 processors: 132 - http: 133 url: http://localhost:1234 134 postmap: 135 doc.language: . 136 ` + "```" + ` 137 138 With the above config we would send our target HTTP service the payload 139 ` + "`{\"content\":\"this is a body\"}`" + `, and whatever the service returns 140 will get mapped into our original document: 141 142 ` + "```json" + ` 143 { 144 "doc": { 145 "id": "foo", 146 "title": "foo bar baz", 147 "description": "here's a thing", 148 "content": "this is a body", 149 "language": { 150 "code": "en", 151 "certainty": 0.2 152 } 153 } 154 } 155 ` + "```" + ``, 156 } 157 } 158 159 //------------------------------------------------------------------------------ 160 161 // ProcessMapConfig is a config struct containing fields for the 162 // ProcessMap processor. 163 type ProcessMapConfig struct { 164 Parts []int `json:"parts" yaml:"parts"` 165 Conditions []condition.Config `json:"conditions" yaml:"conditions"` 166 Premap map[string]string `json:"premap" yaml:"premap"` 167 PremapOptional map[string]string `json:"premap_optional" yaml:"premap_optional"` 168 Postmap map[string]string `json:"postmap" yaml:"postmap"` 169 PostmapOptional map[string]string `json:"postmap_optional" yaml:"postmap_optional"` 170 Processors []Config `json:"processors" yaml:"processors"` 171 } 172 173 // NewProcessMapConfig returns a default ProcessMapConfig. 174 func NewProcessMapConfig() ProcessMapConfig { 175 return ProcessMapConfig{ 176 Parts: []int{}, 177 Conditions: []condition.Config{}, 178 Premap: map[string]string{}, 179 PremapOptional: map[string]string{}, 180 Postmap: map[string]string{}, 181 PostmapOptional: map[string]string{}, 182 Processors: []Config{}, 183 } 184 } 185 186 // Sanitise the configuration into a minimal structure that can be printed 187 // without changing the intent. 188 func (p ProcessMapConfig) Sanitise() (map[string]interface{}, error) { 189 var err error 190 condConfs := make([]interface{}, len(p.Conditions)) 191 for i, cConf := range p.Conditions { 192 if condConfs[i], err = condition.SanitiseConfig(cConf); err != nil { 193 return nil, err 194 } 195 } 196 procConfs := make([]interface{}, len(p.Processors)) 197 for i, pConf := range p.Processors { 198 if procConfs[i], err = SanitiseConfig(pConf); err != nil { 199 return nil, err 200 } 201 } 202 return map[string]interface{}{ 203 "parts": p.Parts, 204 "conditions": condConfs, 205 "premap": p.Premap, 206 "premap_optional": p.PremapOptional, 207 "postmap": p.Postmap, 208 "postmap_optional": p.PostmapOptional, 209 "processors": procConfs, 210 }, nil 211 } 212 213 //------------------------------------------------------------------------------ 214 215 // UnmarshalJSON ensures that when parsing configs that are in a slice the 216 // default values are still applied. 217 func (p *ProcessMapConfig) UnmarshalJSON(bytes []byte) error { 218 type confAlias ProcessMapConfig 219 aliased := confAlias(NewProcessMapConfig()) 220 221 if err := json.Unmarshal(bytes, &aliased); err != nil { 222 return err 223 } 224 225 *p = ProcessMapConfig(aliased) 226 return nil 227 } 228 229 // UnmarshalYAML ensures that when parsing configs that are in a slice the 230 // default values are still applied. 231 func (p *ProcessMapConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { 232 type confAlias ProcessMapConfig 233 aliased := confAlias(NewProcessMapConfig()) 234 235 if err := unmarshal(&aliased); err != nil { 236 return err 237 } 238 239 *p = ProcessMapConfig(aliased) 240 return nil 241 } 242 243 //------------------------------------------------------------------------------ 244 245 // ProcessMap is a processor that applies a list of child processors to a new 246 // payload mapped from the original, and after processing attempts to overlay 247 // the results back onto the original payloads according to more mappings. 248 type ProcessMap struct { 249 parts []int 250 251 mapper *mapper.Type 252 children []types.Processor 253 254 log log.Modular 255 256 mCount metrics.StatCounter 257 mErr metrics.StatCounter 258 mErrPre metrics.StatCounter 259 mErrProc metrics.StatCounter 260 mErrPost metrics.StatCounter 261 mSent metrics.StatCounter 262 mBatchSent metrics.StatCounter 263 } 264 265 // NewProcessMap returns a ProcessField processor. 266 func NewProcessMap( 267 conf ProcessMapConfig, mgr types.Manager, log log.Modular, stats metrics.Type, 268 ) (*ProcessMap, error) { 269 var children []types.Processor 270 for i, pconf := range conf.Processors { 271 pMgr, pLog, pStats := interop.LabelChild(fmt.Sprintf("processor.%v", i), mgr, log, stats) 272 proc, err := New(pconf, pMgr, pLog, pStats) 273 if err != nil { 274 return nil, err 275 } 276 children = append(children, proc) 277 } 278 279 var conditions []types.Condition 280 for i, cconf := range conf.Conditions { 281 cMgr, cLog, cStats := interop.LabelChild(fmt.Sprintf("condition.%v", i), mgr, log, stats) 282 cond, err := condition.New(cconf, cMgr, cLog, cStats) 283 if err != nil { 284 return nil, err 285 } 286 conditions = append(conditions, cond) 287 } 288 289 p := &ProcessMap{ 290 parts: conf.Parts, 291 292 children: children, 293 294 log: log, 295 mCount: stats.GetCounter("count"), 296 mErr: stats.GetCounter("error"), 297 mErrPre: stats.GetCounter("error.premap"), 298 mErrProc: stats.GetCounter("error.processors"), 299 mErrPost: stats.GetCounter("error.postmap"), 300 mSent: stats.GetCounter("sent"), 301 mBatchSent: stats.GetCounter("batch.sent"), 302 } 303 304 var err error 305 if p.mapper, err = mapper.New( 306 mapper.OptSetLogger(log), 307 mapper.OptSetStats(stats), 308 mapper.OptSetConditions(conditions), 309 mapper.OptSetReqMap(conf.Premap), 310 mapper.OptSetOptReqMap(conf.PremapOptional), 311 mapper.OptSetResMap(conf.Postmap), 312 mapper.OptSetOptResMap(conf.PostmapOptional), 313 ); err != nil { 314 return nil, err 315 } 316 317 return p, nil 318 } 319 320 //------------------------------------------------------------------------------ 321 322 // ProcessMessage applies the processor to a message, either creating >0 323 // resulting messages or a response to be sent back to the message source. 324 func (p *ProcessMap) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 325 propMsg, propSpans := tracing.WithChildSpans(TypeProcessMap, msg.Copy()) 326 defer func() { 327 for _, s := range propSpans { 328 s.Finish() 329 } 330 }() 331 332 result := msg.DeepCopy() 333 err := p.CreateResult(propMsg) 334 if err != nil { 335 result.Iter(func(i int, p types.Part) error { 336 FlagErr(p, err) 337 return nil 338 }) 339 msgs := [1]types.Message{result} 340 return msgs[:], nil 341 } 342 343 var failed []int 344 if failed, err = p.OverlayResult(result, propMsg); err != nil { 345 result.Iter(func(i int, p types.Part) error { 346 FlagErr(p, err) 347 return nil 348 }) 349 msgs := [1]types.Message{result} 350 return msgs[:], nil 351 } 352 for _, i := range failed { 353 FlagErr(result.Get(i), errors.New("failed to overlay result from map processors")) 354 } 355 356 msgs := [1]types.Message{result} 357 return msgs[:], nil 358 } 359 360 // TargetsUsed returns a list of target dependencies of this processor derived 361 // from its premap and premap_optional fields. 362 func (p *ProcessMap) TargetsUsed() []string { 363 return p.mapper.TargetsUsed() 364 } 365 366 // TargetsProvided returns a list of targets provided by this processor derived 367 // from its postmap and postmap_optional fields. 368 func (p *ProcessMap) TargetsProvided() []string { 369 return p.mapper.TargetsProvided() 370 } 371 372 // CreateResult performs reduction and child processors to a payload. The size 373 // of the payload will remain unchanged, where reduced indexes are nil. This 374 // result can be overlayed onto the original message in order to complete the 375 // map. 376 func (p *ProcessMap) CreateResult(msg types.Message) error { 377 p.mCount.Incr(1) 378 379 if len(p.parts) > 0 { 380 parts := make([]types.Part, msg.Len()) 381 for _, sel := range p.parts { 382 index := sel 383 if index < 0 { 384 index = msg.Len() + index 385 } 386 if index < 0 || index >= msg.Len() { 387 continue 388 } 389 parts[index] = msg.Get(index) 390 } 391 msg.SetAll(parts) 392 } 393 394 originalLen := msg.Len() 395 396 skipped, failed := p.mapper.MapRequests(msg) 397 if msg.Len() == 0 { 398 msg.SetAll(make([]types.Part, originalLen)) 399 errMapFailed := errors.New("mapping failed for this message") 400 for _, i := range failed { 401 FlagErr(msg.Get(i), errMapFailed) 402 } 403 return nil 404 } 405 406 procResults, err := processMap(msg, p.children) 407 if err != nil { 408 p.mErrProc.Incr(1) 409 p.mErr.Incr(1) 410 p.log.Errorf("Processors failed: %v\n", err) 411 return err 412 } 413 414 var alignedResult types.Message 415 if alignedResult, err = p.mapper.AlignResult(originalLen, skipped, failed, procResults); err != nil { 416 p.mErrPost.Incr(1) 417 p.mErr.Incr(1) 418 p.log.Errorf("Postmap failed: %v\n", err) 419 return err 420 } 421 422 for _, i := range failed { 423 FlagFail(alignedResult.Get(i)) 424 } 425 426 alignedParts := make([]types.Part, alignedResult.Len()) 427 for i := range alignedParts { 428 alignedParts[i] = alignedResult.Get(i) 429 } 430 msg.SetAll(alignedParts) 431 return nil 432 } 433 434 // OverlayResult attempts to merge the result of a process_map with the original 435 // payload as per the map specified in the postmap and postmap_optional fields. 436 func (p *ProcessMap) OverlayResult(payload, response types.Message) ([]int, error) { 437 failed, err := p.mapper.MapResponses(payload, response) 438 if err != nil { 439 p.mErrPost.Incr(1) 440 p.mErr.Incr(1) 441 p.log.Errorf("Postmap failed: %v\n", err) 442 return nil, err 443 } 444 445 p.mBatchSent.Incr(1) 446 p.mSent.Incr(int64(payload.Len())) 447 return failed, nil 448 } 449 450 func processMap(mappedMsg types.Message, processors []types.Processor) ([]types.Message, error) { 451 requestMsgs, res := ExecuteAll(processors, mappedMsg) 452 if res != nil && res.Error() != nil { 453 return nil, res.Error() 454 } 455 456 if len(requestMsgs) == 0 { 457 return nil, errors.New("processors resulted in zero messages") 458 } 459 460 return requestMsgs, nil 461 } 462 463 // CloseAsync shuts down the processor and stops processing requests. 464 func (p *ProcessMap) CloseAsync() { 465 for _, c := range p.children { 466 c.CloseAsync() 467 } 468 } 469 470 // WaitForClose blocks until the processor has closed down. 471 func (p *ProcessMap) WaitForClose(timeout time.Duration) error { 472 stopBy := time.Now().Add(timeout) 473 for _, c := range p.children { 474 if err := c.WaitForClose(time.Until(stopBy)); err != nil { 475 return err 476 } 477 } 478 return nil 479 }