github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/xml.go (about)

     1  package processor
     2  
     3  import (
     4  	"fmt"
     5  	"time"
     6  
     7  	"github.com/Jeffail/benthos/v3/internal/docs"
     8  	"github.com/Jeffail/benthos/v3/internal/tracing"
     9  	"github.com/Jeffail/benthos/v3/internal/xml"
    10  	"github.com/Jeffail/benthos/v3/lib/log"
    11  	"github.com/Jeffail/benthos/v3/lib/metrics"
    12  	"github.com/Jeffail/benthos/v3/lib/types"
    13  )
    14  
    15  func init() {
    16  	Constructors[TypeXML] = TypeSpec{
    17  		constructor: NewXML,
    18  		Status:      docs.StatusBeta,
    19  		Categories: []Category{
    20  			CategoryParsing,
    21  		},
    22  		Summary: `
    23  Parses messages as an XML document, performs a mutation on the data, and then
    24  overwrites the previous contents with the new value.`,
    25  		Description: `
    26  ## Operators
    27  
    28  ### ` + "`to_json`" + `
    29  
    30  Converts an XML document into a JSON structure, where elements appear as keys of
    31  an object according to the following rules:
    32  
    33  - If an element contains attributes they are parsed by prefixing a hyphen,
    34    ` + "`-`" + `, to the attribute label.
    35  - If the element is a simple element and has attributes, the element value
    36    is given the key ` + "`#text`" + `.
    37  - XML comments, directives, and process instructions are ignored.
    38  - When elements are repeated the resulting JSON value is an array.
    39  
    40  For example, given the following XML:
    41  
    42  ` + "```xml" + `
    43  <root>
    44    <title>This is a title</title>
    45    <description tone="boring">This is a description</description>
    46    <elements id="1">foo1</elements>
    47    <elements id="2">foo2</elements>
    48    <elements>foo3</elements>
    49  </root>
    50  ` + "```" + `
    51  
    52  The resulting JSON structure would look like this:
    53  
    54  ` + "```json" + `
    55  {
    56    "root":{
    57      "title":"This is a title",
    58      "description":{
    59        "#text":"This is a description",
    60        "-tone":"boring"
    61      },
    62      "elements":[
    63        {"#text":"foo1","-id":"1"},
    64        {"#text":"foo2","-id":"2"},
    65        "foo3"
    66      ]
    67    }
    68  }
    69  ` + "```" + `
    70  
    71  With cast set to true, the resulting JSON structure would look like this:
    72  
    73  ` + "```json" + `
    74  {
    75    "root":{
    76      "title":"This is a title",
    77      "description":{
    78        "#text":"This is a description",
    79        "-tone":"boring"
    80      },
    81      "elements":[
    82        {"#text":"foo1","-id":1},
    83        {"#text":"foo2","-id":2},
    84        "foo3"
    85      ]
    86    }
    87  }
    88  ` + "```" + ``,
    89  		FieldSpecs: docs.FieldSpecs{
    90  			docs.FieldCommon("operator", "An XML [operation](#operators) to apply to messages.").HasOptions("to_json"),
    91  			docs.FieldCommon("cast", "Whether to try to cast values that are numbers and booleans to the right type. Default: all values are strings."),
    92  			PartsFieldSpec,
    93  		},
    94  	}
    95  }
    96  
    97  //------------------------------------------------------------------------------
    98  
    99  // XMLConfig contains configuration fields for the XML processor.
   100  type XMLConfig struct {
   101  	Parts    []int  `json:"parts" yaml:"parts"`
   102  	Operator string `json:"operator" yaml:"operator"`
   103  	Cast     bool   `json:"cast" yaml:"cast"`
   104  }
   105  
   106  // NewXMLConfig returns a XMLConfig with default values.
   107  func NewXMLConfig() XMLConfig {
   108  	return XMLConfig{
   109  		Parts:    []int{},
   110  		Operator: "to_json",
   111  		Cast:     false,
   112  	}
   113  }
   114  
   115  //------------------------------------------------------------------------------
   116  
   117  // XML is a processor that performs an operation on a XML payload.
   118  type XML struct {
   119  	parts []int
   120  
   121  	conf  Config
   122  	log   log.Modular
   123  	stats metrics.Type
   124  
   125  	mCount     metrics.StatCounter
   126  	mErr       metrics.StatCounter
   127  	mSent      metrics.StatCounter
   128  	mBatchSent metrics.StatCounter
   129  }
   130  
   131  // NewXML returns a XML processor.
   132  func NewXML(
   133  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   134  ) (Type, error) {
   135  	if conf.XML.Operator != "to_json" {
   136  		return nil, fmt.Errorf("operator not recognised: %v", conf.XML.Operator)
   137  	}
   138  
   139  	j := &XML{
   140  		parts: conf.XML.Parts,
   141  		conf:  conf,
   142  		log:   log,
   143  		stats: stats,
   144  
   145  		mCount:     stats.GetCounter("count"),
   146  		mErr:       stats.GetCounter("error"),
   147  		mSent:      stats.GetCounter("sent"),
   148  		mBatchSent: stats.GetCounter("batch.sent"),
   149  	}
   150  	return j, nil
   151  }
   152  
   153  //------------------------------------------------------------------------------
   154  
   155  // ProcessMessage applies the processor to a message, either creating >0
   156  // resulting messages or a response to be sent back to the message source.
   157  func (p *XML) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   158  	p.mCount.Incr(1)
   159  	newMsg := msg.Copy()
   160  
   161  	proc := func(index int, span *tracing.Span, part types.Part) error {
   162  		root, err := xml.ToMap(part.Get(), p.conf.XML.Cast)
   163  		if err != nil {
   164  			p.mErr.Incr(1)
   165  			p.log.Debugf("Failed to parse part as XML: %v\n", err)
   166  			return err
   167  		}
   168  		if err = part.SetJSON(root); err != nil {
   169  			p.mErr.Incr(1)
   170  			p.log.Debugf("Failed to marshal XML as JSON: %v\n", err)
   171  			return err
   172  		}
   173  		return nil
   174  	}
   175  
   176  	IteratePartsWithSpanV2(TypeXML, p.parts, newMsg, proc)
   177  
   178  	p.mBatchSent.Incr(1)
   179  	p.mSent.Incr(int64(newMsg.Len()))
   180  	return []types.Message{newMsg}, nil
   181  }
   182  
   183  // CloseAsync shuts down the processor and stops processing requests.
   184  func (p *XML) CloseAsync() {
   185  }
   186  
   187  // WaitForClose blocks until the processor has closed down.
   188  func (p *XML) WaitForClose(timeout time.Duration) error {
   189  	return nil
   190  }