github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/xml.go (about) 1 package processor 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/Jeffail/benthos/v3/internal/docs" 8 "github.com/Jeffail/benthos/v3/internal/tracing" 9 "github.com/Jeffail/benthos/v3/internal/xml" 10 "github.com/Jeffail/benthos/v3/lib/log" 11 "github.com/Jeffail/benthos/v3/lib/metrics" 12 "github.com/Jeffail/benthos/v3/lib/types" 13 ) 14 15 func init() { 16 Constructors[TypeXML] = TypeSpec{ 17 constructor: NewXML, 18 Status: docs.StatusBeta, 19 Categories: []Category{ 20 CategoryParsing, 21 }, 22 Summary: ` 23 Parses messages as an XML document, performs a mutation on the data, and then 24 overwrites the previous contents with the new value.`, 25 Description: ` 26 ## Operators 27 28 ### ` + "`to_json`" + ` 29 30 Converts an XML document into a JSON structure, where elements appear as keys of 31 an object according to the following rules: 32 33 - If an element contains attributes they are parsed by prefixing a hyphen, 34 ` + "`-`" + `, to the attribute label. 35 - If the element is a simple element and has attributes, the element value 36 is given the key ` + "`#text`" + `. 37 - XML comments, directives, and process instructions are ignored. 38 - When elements are repeated the resulting JSON value is an array. 39 40 For example, given the following XML: 41 42 ` + "```xml" + ` 43 <root> 44 <title>This is a title</title> 45 <description tone="boring">This is a description</description> 46 <elements id="1">foo1</elements> 47 <elements id="2">foo2</elements> 48 <elements>foo3</elements> 49 </root> 50 ` + "```" + ` 51 52 The resulting JSON structure would look like this: 53 54 ` + "```json" + ` 55 { 56 "root":{ 57 "title":"This is a title", 58 "description":{ 59 "#text":"This is a description", 60 "-tone":"boring" 61 }, 62 "elements":[ 63 {"#text":"foo1","-id":"1"}, 64 {"#text":"foo2","-id":"2"}, 65 "foo3" 66 ] 67 } 68 } 69 ` + "```" + ` 70 71 With cast set to true, the resulting JSON structure would look like this: 72 73 ` + "```json" + ` 74 { 75 "root":{ 76 "title":"This is a title", 77 "description":{ 78 "#text":"This is a description", 79 "-tone":"boring" 80 }, 81 "elements":[ 82 {"#text":"foo1","-id":1}, 83 {"#text":"foo2","-id":2}, 84 "foo3" 85 ] 86 } 87 } 88 ` + "```" + ``, 89 FieldSpecs: docs.FieldSpecs{ 90 docs.FieldCommon("operator", "An XML [operation](#operators) to apply to messages.").HasOptions("to_json"), 91 docs.FieldCommon("cast", "Whether to try to cast values that are numbers and booleans to the right type. Default: all values are strings."), 92 PartsFieldSpec, 93 }, 94 } 95 } 96 97 //------------------------------------------------------------------------------ 98 99 // XMLConfig contains configuration fields for the XML processor. 100 type XMLConfig struct { 101 Parts []int `json:"parts" yaml:"parts"` 102 Operator string `json:"operator" yaml:"operator"` 103 Cast bool `json:"cast" yaml:"cast"` 104 } 105 106 // NewXMLConfig returns a XMLConfig with default values. 107 func NewXMLConfig() XMLConfig { 108 return XMLConfig{ 109 Parts: []int{}, 110 Operator: "to_json", 111 Cast: false, 112 } 113 } 114 115 //------------------------------------------------------------------------------ 116 117 // XML is a processor that performs an operation on a XML payload. 118 type XML struct { 119 parts []int 120 121 conf Config 122 log log.Modular 123 stats metrics.Type 124 125 mCount metrics.StatCounter 126 mErr metrics.StatCounter 127 mSent metrics.StatCounter 128 mBatchSent metrics.StatCounter 129 } 130 131 // NewXML returns a XML processor. 132 func NewXML( 133 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 134 ) (Type, error) { 135 if conf.XML.Operator != "to_json" { 136 return nil, fmt.Errorf("operator not recognised: %v", conf.XML.Operator) 137 } 138 139 j := &XML{ 140 parts: conf.XML.Parts, 141 conf: conf, 142 log: log, 143 stats: stats, 144 145 mCount: stats.GetCounter("count"), 146 mErr: stats.GetCounter("error"), 147 mSent: stats.GetCounter("sent"), 148 mBatchSent: stats.GetCounter("batch.sent"), 149 } 150 return j, nil 151 } 152 153 //------------------------------------------------------------------------------ 154 155 // ProcessMessage applies the processor to a message, either creating >0 156 // resulting messages or a response to be sent back to the message source. 157 func (p *XML) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 158 p.mCount.Incr(1) 159 newMsg := msg.Copy() 160 161 proc := func(index int, span *tracing.Span, part types.Part) error { 162 root, err := xml.ToMap(part.Get(), p.conf.XML.Cast) 163 if err != nil { 164 p.mErr.Incr(1) 165 p.log.Debugf("Failed to parse part as XML: %v\n", err) 166 return err 167 } 168 if err = part.SetJSON(root); err != nil { 169 p.mErr.Incr(1) 170 p.log.Debugf("Failed to marshal XML as JSON: %v\n", err) 171 return err 172 } 173 return nil 174 } 175 176 IteratePartsWithSpanV2(TypeXML, p.parts, newMsg, proc) 177 178 p.mBatchSent.Incr(1) 179 p.mSent.Incr(int64(newMsg.Len())) 180 return []types.Message{newMsg}, nil 181 } 182 183 // CloseAsync shuts down the processor and stops processing requests. 184 func (p *XML) CloseAsync() { 185 } 186 187 // WaitForClose blocks until the processor has closed down. 188 func (p *XML) WaitForClose(timeout time.Duration) error { 189 return nil 190 }