github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/protobuf.go (about)

     1  package processor
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"time"
     9  
    10  	"github.com/Jeffail/benthos/v3/internal/docs"
    11  	"github.com/Jeffail/benthos/v3/internal/tracing"
    12  	"github.com/Jeffail/benthos/v3/lib/log"
    13  	"github.com/Jeffail/benthos/v3/lib/metrics"
    14  	"github.com/Jeffail/benthos/v3/lib/types"
    15  
    16  	// nolint:staticcheck // Ignore SA1019 deprecation warning until we can switch to "google.golang.org/protobuf/types/dynamicpb"
    17  	"github.com/golang/protobuf/jsonpb"
    18  	// nolint:staticcheck // Ignore SA1019 deprecation warning until we can switch to "google.golang.org/protobuf/types/dynamicpb"
    19  	"github.com/golang/protobuf/proto"
    20  
    21  	"github.com/jhump/protoreflect/desc"
    22  	"github.com/jhump/protoreflect/desc/protoparse"
    23  	"github.com/jhump/protoreflect/dynamic"
    24  )
    25  
    26  //------------------------------------------------------------------------------
    27  
    28  func init() {
    29  	Constructors[TypeProtobuf] = TypeSpec{
    30  		constructor: NewProtobuf,
    31  		Categories: []Category{
    32  			CategoryParsing,
    33  		},
    34  		Summary: `
    35  Performs conversions to or from a protobuf message. This processor uses
    36  reflection, meaning conversions can be made directly from the target .proto
    37  files.`,
    38  		Status: docs.StatusBeta,
    39  		Description: `
    40  The main functionality of this processor is to map to and from JSON documents,
    41  you can read more about JSON mapping of protobuf messages here:
    42  [https://developers.google.com/protocol-buffers/docs/proto3#json](https://developers.google.com/protocol-buffers/docs/proto3#json)
    43  
    44  Using reflection for processing protobuf messages in this way is less performant
    45  than generating and using native code. Therefore when performance is critical it
    46  is recommended that you use Benthos plugins instead for processing protobuf
    47  messages natively, you can find an example of Benthos plugins at
    48  [https://github.com/benthosdev/benthos-plugin-example](https://github.com/benthosdev/benthos-plugin-example)
    49  
    50  ## Operators
    51  
    52  ### ` + "`to_json`" + `
    53  
    54  Converts protobuf messages into a generic JSON structure. This makes it easier
    55  to manipulate the contents of the document within Benthos.
    56  
    57  ### ` + "`from_json`" + `
    58  
    59  Attempts to create a target protobuf message from a generic JSON structure.`,
    60  		FieldSpecs: docs.FieldSpecs{
    61  			docs.FieldCommon("operator", "The [operator](#operators) to execute").HasOptions("to_json", "from_json"),
    62  			docs.FieldCommon("message", "The fully qualified name of the protobuf message to convert to/from."),
    63  			docs.FieldString("import_paths", "A list of directories containing .proto files, including all definitions required for parsing the target message. If left empty the current directory is used. Each directory listed will be walked with all found .proto files imported.").Array(),
    64  			docs.FieldDeprecated("import_path"),
    65  			PartsFieldSpec,
    66  		},
    67  		Examples: []docs.AnnotatedExample{
    68  			{
    69  				Title: "JSON to Protobuf",
    70  				Summary: `
    71  If we have the following protobuf definition within a directory called ` + "`testing/schema`" + `:
    72  
    73  ` + "```protobuf" + `
    74  syntax = "proto3";
    75  package testing;
    76  
    77  import "google/protobuf/timestamp.proto";
    78  
    79  message Person {
    80    string first_name = 1;
    81    string last_name = 2;
    82    string full_name = 3;
    83    int32 age = 4;
    84    int32 id = 5; // Unique ID number for this person.
    85    string email = 6;
    86  
    87    google.protobuf.Timestamp last_updated = 7;
    88  }
    89  ` + "```" + `
    90  
    91  And a stream of JSON documents of the form:
    92  
    93  ` + "```json" + `
    94  {
    95  	"firstName": "caleb",
    96  	"lastName": "quaye",
    97  	"email": "caleb@myspace.com"
    98  }
    99  ` + "```" + `
   100  
   101  We can convert the documents into protobuf messages with the following config:`,
   102  				Config: `
   103  pipeline:
   104    processors:
   105      - protobuf:
   106          operator: from_json
   107          message: testing.Person
   108          import_paths: [ testing/schema ]
   109  `,
   110  			},
   111  			{
   112  				Title: "Protobuf to JSON",
   113  				Summary: `
   114  If we have the following protobuf definition within a directory called ` + "`testing/schema`" + `:
   115  
   116  ` + "```protobuf" + `
   117  syntax = "proto3";
   118  package testing;
   119  
   120  import "google/protobuf/timestamp.proto";
   121  
   122  message Person {
   123    string first_name = 1;
   124    string last_name = 2;
   125    string full_name = 3;
   126    int32 age = 4;
   127    int32 id = 5; // Unique ID number for this person.
   128    string email = 6;
   129  
   130    google.protobuf.Timestamp last_updated = 7;
   131  }
   132  ` + "```" + `
   133  
   134  And a stream of protobuf messages of the type ` + "`Person`" + `, we could convert them into JSON documents of the format:
   135  
   136  ` + "```json" + `
   137  {
   138  	"firstName": "caleb",
   139  	"lastName": "quaye",
   140  	"email": "caleb@myspace.com"
   141  }
   142  ` + "```" + `
   143  
   144  With the following config:`,
   145  				Config: `
   146  pipeline:
   147    processors:
   148      - protobuf:
   149          operator: to_json
   150          message: testing.Person
   151          import_paths: [ testing/schema ]
   152  `,
   153  			},
   154  		},
   155  	}
   156  }
   157  
   158  //------------------------------------------------------------------------------
   159  
   160  // ProtobufConfig contains configuration fields for the Protobuf processor.
   161  type ProtobufConfig struct {
   162  	Parts       []int    `json:"parts" yaml:"parts"`
   163  	Operator    string   `json:"operator" yaml:"operator"`
   164  	Message     string   `json:"message" yaml:"message"`
   165  	ImportPaths []string `json:"import_paths" yaml:"import_paths"`
   166  	ImportPath  string   `json:"import_path" yaml:"import_path"`
   167  }
   168  
   169  // NewProtobufConfig returns a ProtobufConfig with default values.
   170  func NewProtobufConfig() ProtobufConfig {
   171  	return ProtobufConfig{
   172  		Parts:       []int{},
   173  		Operator:    "to_json",
   174  		Message:     "",
   175  		ImportPaths: []string{},
   176  		ImportPath:  "",
   177  	}
   178  }
   179  
   180  //------------------------------------------------------------------------------
   181  
   182  type protobufOperator func(part types.Part) error
   183  
   184  func newProtobufToJSONOperator(message string, importPaths []string) (protobufOperator, error) {
   185  	if message == "" {
   186  		return nil, errors.New("message field must not be empty")
   187  	}
   188  
   189  	descriptors, err := loadDescriptors(importPaths)
   190  	if err != nil {
   191  		return nil, err
   192  	}
   193  
   194  	m := getMessageFromDescriptors(message, descriptors)
   195  	if m == nil {
   196  		return nil, fmt.Errorf("unable to find message '%v' definition within '%v'", message, importPaths)
   197  	}
   198  
   199  	marshaller := &jsonpb.Marshaler{
   200  		AnyResolver: dynamic.AnyResolver(dynamic.NewMessageFactoryWithDefaults(), descriptors...),
   201  	}
   202  
   203  	return func(part types.Part) error {
   204  		msg := dynamic.NewMessage(m)
   205  		if err := proto.Unmarshal(part.Get(), msg); err != nil {
   206  			return fmt.Errorf("failed to unmarshal message: %w", err)
   207  		}
   208  
   209  		data, err := msg.MarshalJSONPB(marshaller)
   210  		if err != nil {
   211  			return fmt.Errorf("failed to marshal protobuf message: %w", err)
   212  		}
   213  
   214  		part.Set(data)
   215  		return nil
   216  	}, nil
   217  }
   218  
   219  func newProtobufFromJSONOperator(message string, importPaths []string) (protobufOperator, error) {
   220  	if message == "" {
   221  		return nil, errors.New("message field must not be empty")
   222  	}
   223  
   224  	descriptors, err := loadDescriptors(importPaths)
   225  	if err != nil {
   226  		return nil, err
   227  	}
   228  
   229  	m := getMessageFromDescriptors(message, descriptors)
   230  	if m == nil {
   231  		return nil, fmt.Errorf("unable to find message '%v' definition within '%v'", message, importPaths)
   232  	}
   233  
   234  	unmarshaler := &jsonpb.Unmarshaler{
   235  		AnyResolver: dynamic.AnyResolver(dynamic.NewMessageFactoryWithDefaults(), descriptors...),
   236  	}
   237  
   238  	return func(part types.Part) error {
   239  		msg := dynamic.NewMessage(m)
   240  		if err := msg.UnmarshalJSONPB(unmarshaler, part.Get()); err != nil {
   241  			return fmt.Errorf("failed to unmarshal JSON message: %w", err)
   242  		}
   243  
   244  		data, err := msg.Marshal()
   245  		if err != nil {
   246  			return fmt.Errorf("failed to marshal protobuf message: %v", err)
   247  		}
   248  
   249  		part.Set(data)
   250  		return nil
   251  	}, nil
   252  }
   253  
   254  func strToProtobufOperator(opStr, message string, importPaths []string) (protobufOperator, error) {
   255  	switch opStr {
   256  	case "to_json":
   257  		return newProtobufToJSONOperator(message, importPaths)
   258  	case "from_json":
   259  		return newProtobufFromJSONOperator(message, importPaths)
   260  	}
   261  	return nil, fmt.Errorf("operator not recognised: %v", opStr)
   262  }
   263  
   264  func loadDescriptors(importPaths []string) ([]*desc.FileDescriptor, error) {
   265  	var parser protoparse.Parser
   266  	if len(importPaths) == 0 {
   267  		importPaths = []string{"."}
   268  	} else {
   269  		parser.ImportPaths = importPaths
   270  	}
   271  
   272  	var files []string
   273  	for _, importPath := range importPaths {
   274  		if err := filepath.Walk(importPath, func(path string, info os.FileInfo, ferr error) error {
   275  			if ferr != nil || info.IsDir() {
   276  				return ferr
   277  			}
   278  			if filepath.Ext(info.Name()) == ".proto" {
   279  				rPath, ferr := filepath.Rel(importPath, path)
   280  				if ferr != nil {
   281  					return fmt.Errorf("failed to get relative path: %v", ferr)
   282  				}
   283  				files = append(files, rPath)
   284  			}
   285  			return nil
   286  		}); err != nil {
   287  			return nil, err
   288  		}
   289  	}
   290  
   291  	fds, err := parser.ParseFiles(files...)
   292  	if err != nil {
   293  		return nil, fmt.Errorf("failed to parse .proto file: %v", err)
   294  	}
   295  	if len(fds) == 0 {
   296  		return nil, fmt.Errorf("no .proto files were found in the paths '%v'", importPaths)
   297  	}
   298  
   299  	return fds, err
   300  }
   301  
   302  func getMessageFromDescriptors(message string, fds []*desc.FileDescriptor) *desc.MessageDescriptor {
   303  	var msg *desc.MessageDescriptor
   304  	for _, fd := range fds {
   305  		msg = fd.FindMessage(message)
   306  		if msg != nil {
   307  			break
   308  		}
   309  	}
   310  	return msg
   311  }
   312  
   313  //------------------------------------------------------------------------------
   314  
   315  // Protobuf is a processor that performs an operation on an Protobuf payload.
   316  type Protobuf struct {
   317  	parts    []int
   318  	operator protobufOperator
   319  
   320  	conf  Config
   321  	log   log.Modular
   322  	stats metrics.Type
   323  
   324  	mCount     metrics.StatCounter
   325  	mErr       metrics.StatCounter
   326  	mSent      metrics.StatCounter
   327  	mBatchSent metrics.StatCounter
   328  }
   329  
   330  // NewProtobuf returns an Protobuf processor.
   331  func NewProtobuf(
   332  	conf Config, mgr types.Manager, log log.Modular, stats metrics.Type,
   333  ) (Type, error) {
   334  	p := &Protobuf{
   335  		parts: conf.Protobuf.Parts,
   336  		conf:  conf,
   337  		log:   log,
   338  		stats: stats,
   339  
   340  		mCount:     stats.GetCounter("count"),
   341  		mErr:       stats.GetCounter("error"),
   342  		mSent:      stats.GetCounter("sent"),
   343  		mBatchSent: stats.GetCounter("batch.sent"),
   344  	}
   345  
   346  	importPaths := conf.Protobuf.ImportPaths
   347  	if len(conf.Protobuf.ImportPath) > 0 {
   348  		importPaths = append(importPaths, conf.Protobuf.ImportPath)
   349  	}
   350  
   351  	var err error
   352  	if p.operator, err = strToProtobufOperator(conf.Protobuf.Operator, conf.Protobuf.Message, importPaths); err != nil {
   353  		return nil, err
   354  	}
   355  	return p, nil
   356  }
   357  
   358  //------------------------------------------------------------------------------
   359  
   360  // ProcessMessage applies the processor to a message, either creating >0
   361  // resulting messages or a response to be sent back to the message source.
   362  func (p *Protobuf) ProcessMessage(msg types.Message) ([]types.Message, types.Response) {
   363  	p.mCount.Incr(1)
   364  	newMsg := msg.Copy()
   365  
   366  	proc := func(index int, span *tracing.Span, part types.Part) error {
   367  		if err := p.operator(part); err != nil {
   368  			p.mErr.Incr(1)
   369  			p.log.Debugf("Operator failed: %v\n", err)
   370  			return err
   371  		}
   372  		return nil
   373  	}
   374  
   375  	IteratePartsWithSpanV2(TypeProtobuf, p.parts, newMsg, proc)
   376  
   377  	p.mBatchSent.Incr(1)
   378  	p.mSent.Incr(int64(newMsg.Len()))
   379  	return []types.Message{newMsg}, nil
   380  }
   381  
   382  // CloseAsync shuts down the processor and stops processing requests.
   383  func (p *Protobuf) CloseAsync() {
   384  }
   385  
   386  // WaitForClose blocks until the processor has closed down.
   387  func (p *Protobuf) WaitForClose(timeout time.Duration) error {
   388  	return nil
   389  }
   390  
   391  //------------------------------------------------------------------------------