github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/protobuf.go (about) 1 package processor 2 3 import ( 4 "errors" 5 "fmt" 6 "os" 7 "path/filepath" 8 "time" 9 10 "github.com/Jeffail/benthos/v3/internal/docs" 11 "github.com/Jeffail/benthos/v3/internal/tracing" 12 "github.com/Jeffail/benthos/v3/lib/log" 13 "github.com/Jeffail/benthos/v3/lib/metrics" 14 "github.com/Jeffail/benthos/v3/lib/types" 15 16 // nolint:staticcheck // Ignore SA1019 deprecation warning until we can switch to "google.golang.org/protobuf/types/dynamicpb" 17 "github.com/golang/protobuf/jsonpb" 18 // nolint:staticcheck // Ignore SA1019 deprecation warning until we can switch to "google.golang.org/protobuf/types/dynamicpb" 19 "github.com/golang/protobuf/proto" 20 21 "github.com/jhump/protoreflect/desc" 22 "github.com/jhump/protoreflect/desc/protoparse" 23 "github.com/jhump/protoreflect/dynamic" 24 ) 25 26 //------------------------------------------------------------------------------ 27 28 func init() { 29 Constructors[TypeProtobuf] = TypeSpec{ 30 constructor: NewProtobuf, 31 Categories: []Category{ 32 CategoryParsing, 33 }, 34 Summary: ` 35 Performs conversions to or from a protobuf message. This processor uses 36 reflection, meaning conversions can be made directly from the target .proto 37 files.`, 38 Status: docs.StatusBeta, 39 Description: ` 40 The main functionality of this processor is to map to and from JSON documents, 41 you can read more about JSON mapping of protobuf messages here: 42 [https://developers.google.com/protocol-buffers/docs/proto3#json](https://developers.google.com/protocol-buffers/docs/proto3#json) 43 44 Using reflection for processing protobuf messages in this way is less performant 45 than generating and using native code. Therefore when performance is critical it 46 is recommended that you use Benthos plugins instead for processing protobuf 47 messages natively, you can find an example of Benthos plugins at 48 [https://github.com/benthosdev/benthos-plugin-example](https://github.com/benthosdev/benthos-plugin-example) 49 50 ## Operators 51 52 ### ` + "`to_json`" + ` 53 54 Converts protobuf messages into a generic JSON structure. This makes it easier 55 to manipulate the contents of the document within Benthos. 56 57 ### ` + "`from_json`" + ` 58 59 Attempts to create a target protobuf message from a generic JSON structure.`, 60 FieldSpecs: docs.FieldSpecs{ 61 docs.FieldCommon("operator", "The [operator](#operators) to execute").HasOptions("to_json", "from_json"), 62 docs.FieldCommon("message", "The fully qualified name of the protobuf message to convert to/from."), 63 docs.FieldString("import_paths", "A list of directories containing .proto files, including all definitions required for parsing the target message. If left empty the current directory is used. Each directory listed will be walked with all found .proto files imported.").Array(), 64 docs.FieldDeprecated("import_path"), 65 PartsFieldSpec, 66 }, 67 Examples: []docs.AnnotatedExample{ 68 { 69 Title: "JSON to Protobuf", 70 Summary: ` 71 If we have the following protobuf definition within a directory called ` + "`testing/schema`" + `: 72 73 ` + "```protobuf" + ` 74 syntax = "proto3"; 75 package testing; 76 77 import "google/protobuf/timestamp.proto"; 78 79 message Person { 80 string first_name = 1; 81 string last_name = 2; 82 string full_name = 3; 83 int32 age = 4; 84 int32 id = 5; // Unique ID number for this person. 85 string email = 6; 86 87 google.protobuf.Timestamp last_updated = 7; 88 } 89 ` + "```" + ` 90 91 And a stream of JSON documents of the form: 92 93 ` + "```json" + ` 94 { 95 "firstName": "caleb", 96 "lastName": "quaye", 97 "email": "caleb@myspace.com" 98 } 99 ` + "```" + ` 100 101 We can convert the documents into protobuf messages with the following config:`, 102 Config: ` 103 pipeline: 104 processors: 105 - protobuf: 106 operator: from_json 107 message: testing.Person 108 import_paths: [ testing/schema ] 109 `, 110 }, 111 { 112 Title: "Protobuf to JSON", 113 Summary: ` 114 If we have the following protobuf definition within a directory called ` + "`testing/schema`" + `: 115 116 ` + "```protobuf" + ` 117 syntax = "proto3"; 118 package testing; 119 120 import "google/protobuf/timestamp.proto"; 121 122 message Person { 123 string first_name = 1; 124 string last_name = 2; 125 string full_name = 3; 126 int32 age = 4; 127 int32 id = 5; // Unique ID number for this person. 128 string email = 6; 129 130 google.protobuf.Timestamp last_updated = 7; 131 } 132 ` + "```" + ` 133 134 And a stream of protobuf messages of the type ` + "`Person`" + `, we could convert them into JSON documents of the format: 135 136 ` + "```json" + ` 137 { 138 "firstName": "caleb", 139 "lastName": "quaye", 140 "email": "caleb@myspace.com" 141 } 142 ` + "```" + ` 143 144 With the following config:`, 145 Config: ` 146 pipeline: 147 processors: 148 - protobuf: 149 operator: to_json 150 message: testing.Person 151 import_paths: [ testing/schema ] 152 `, 153 }, 154 }, 155 } 156 } 157 158 //------------------------------------------------------------------------------ 159 160 // ProtobufConfig contains configuration fields for the Protobuf processor. 161 type ProtobufConfig struct { 162 Parts []int `json:"parts" yaml:"parts"` 163 Operator string `json:"operator" yaml:"operator"` 164 Message string `json:"message" yaml:"message"` 165 ImportPaths []string `json:"import_paths" yaml:"import_paths"` 166 ImportPath string `json:"import_path" yaml:"import_path"` 167 } 168 169 // NewProtobufConfig returns a ProtobufConfig with default values. 170 func NewProtobufConfig() ProtobufConfig { 171 return ProtobufConfig{ 172 Parts: []int{}, 173 Operator: "to_json", 174 Message: "", 175 ImportPaths: []string{}, 176 ImportPath: "", 177 } 178 } 179 180 //------------------------------------------------------------------------------ 181 182 type protobufOperator func(part types.Part) error 183 184 func newProtobufToJSONOperator(message string, importPaths []string) (protobufOperator, error) { 185 if message == "" { 186 return nil, errors.New("message field must not be empty") 187 } 188 189 descriptors, err := loadDescriptors(importPaths) 190 if err != nil { 191 return nil, err 192 } 193 194 m := getMessageFromDescriptors(message, descriptors) 195 if m == nil { 196 return nil, fmt.Errorf("unable to find message '%v' definition within '%v'", message, importPaths) 197 } 198 199 marshaller := &jsonpb.Marshaler{ 200 AnyResolver: dynamic.AnyResolver(dynamic.NewMessageFactoryWithDefaults(), descriptors...), 201 } 202 203 return func(part types.Part) error { 204 msg := dynamic.NewMessage(m) 205 if err := proto.Unmarshal(part.Get(), msg); err != nil { 206 return fmt.Errorf("failed to unmarshal message: %w", err) 207 } 208 209 data, err := msg.MarshalJSONPB(marshaller) 210 if err != nil { 211 return fmt.Errorf("failed to marshal protobuf message: %w", err) 212 } 213 214 part.Set(data) 215 return nil 216 }, nil 217 } 218 219 func newProtobufFromJSONOperator(message string, importPaths []string) (protobufOperator, error) { 220 if message == "" { 221 return nil, errors.New("message field must not be empty") 222 } 223 224 descriptors, err := loadDescriptors(importPaths) 225 if err != nil { 226 return nil, err 227 } 228 229 m := getMessageFromDescriptors(message, descriptors) 230 if m == nil { 231 return nil, fmt.Errorf("unable to find message '%v' definition within '%v'", message, importPaths) 232 } 233 234 unmarshaler := &jsonpb.Unmarshaler{ 235 AnyResolver: dynamic.AnyResolver(dynamic.NewMessageFactoryWithDefaults(), descriptors...), 236 } 237 238 return func(part types.Part) error { 239 msg := dynamic.NewMessage(m) 240 if err := msg.UnmarshalJSONPB(unmarshaler, part.Get()); err != nil { 241 return fmt.Errorf("failed to unmarshal JSON message: %w", err) 242 } 243 244 data, err := msg.Marshal() 245 if err != nil { 246 return fmt.Errorf("failed to marshal protobuf message: %v", err) 247 } 248 249 part.Set(data) 250 return nil 251 }, nil 252 } 253 254 func strToProtobufOperator(opStr, message string, importPaths []string) (protobufOperator, error) { 255 switch opStr { 256 case "to_json": 257 return newProtobufToJSONOperator(message, importPaths) 258 case "from_json": 259 return newProtobufFromJSONOperator(message, importPaths) 260 } 261 return nil, fmt.Errorf("operator not recognised: %v", opStr) 262 } 263 264 func loadDescriptors(importPaths []string) ([]*desc.FileDescriptor, error) { 265 var parser protoparse.Parser 266 if len(importPaths) == 0 { 267 importPaths = []string{"."} 268 } else { 269 parser.ImportPaths = importPaths 270 } 271 272 var files []string 273 for _, importPath := range importPaths { 274 if err := filepath.Walk(importPath, func(path string, info os.FileInfo, ferr error) error { 275 if ferr != nil || info.IsDir() { 276 return ferr 277 } 278 if filepath.Ext(info.Name()) == ".proto" { 279 rPath, ferr := filepath.Rel(importPath, path) 280 if ferr != nil { 281 return fmt.Errorf("failed to get relative path: %v", ferr) 282 } 283 files = append(files, rPath) 284 } 285 return nil 286 }); err != nil { 287 return nil, err 288 } 289 } 290 291 fds, err := parser.ParseFiles(files...) 292 if err != nil { 293 return nil, fmt.Errorf("failed to parse .proto file: %v", err) 294 } 295 if len(fds) == 0 { 296 return nil, fmt.Errorf("no .proto files were found in the paths '%v'", importPaths) 297 } 298 299 return fds, err 300 } 301 302 func getMessageFromDescriptors(message string, fds []*desc.FileDescriptor) *desc.MessageDescriptor { 303 var msg *desc.MessageDescriptor 304 for _, fd := range fds { 305 msg = fd.FindMessage(message) 306 if msg != nil { 307 break 308 } 309 } 310 return msg 311 } 312 313 //------------------------------------------------------------------------------ 314 315 // Protobuf is a processor that performs an operation on an Protobuf payload. 316 type Protobuf struct { 317 parts []int 318 operator protobufOperator 319 320 conf Config 321 log log.Modular 322 stats metrics.Type 323 324 mCount metrics.StatCounter 325 mErr metrics.StatCounter 326 mSent metrics.StatCounter 327 mBatchSent metrics.StatCounter 328 } 329 330 // NewProtobuf returns an Protobuf processor. 331 func NewProtobuf( 332 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 333 ) (Type, error) { 334 p := &Protobuf{ 335 parts: conf.Protobuf.Parts, 336 conf: conf, 337 log: log, 338 stats: stats, 339 340 mCount: stats.GetCounter("count"), 341 mErr: stats.GetCounter("error"), 342 mSent: stats.GetCounter("sent"), 343 mBatchSent: stats.GetCounter("batch.sent"), 344 } 345 346 importPaths := conf.Protobuf.ImportPaths 347 if len(conf.Protobuf.ImportPath) > 0 { 348 importPaths = append(importPaths, conf.Protobuf.ImportPath) 349 } 350 351 var err error 352 if p.operator, err = strToProtobufOperator(conf.Protobuf.Operator, conf.Protobuf.Message, importPaths); err != nil { 353 return nil, err 354 } 355 return p, nil 356 } 357 358 //------------------------------------------------------------------------------ 359 360 // ProcessMessage applies the processor to a message, either creating >0 361 // resulting messages or a response to be sent back to the message source. 362 func (p *Protobuf) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 363 p.mCount.Incr(1) 364 newMsg := msg.Copy() 365 366 proc := func(index int, span *tracing.Span, part types.Part) error { 367 if err := p.operator(part); err != nil { 368 p.mErr.Incr(1) 369 p.log.Debugf("Operator failed: %v\n", err) 370 return err 371 } 372 return nil 373 } 374 375 IteratePartsWithSpanV2(TypeProtobuf, p.parts, newMsg, proc) 376 377 p.mBatchSent.Incr(1) 378 p.mSent.Incr(int64(newMsg.Len())) 379 return []types.Message{newMsg}, nil 380 } 381 382 // CloseAsync shuts down the processor and stops processing requests. 383 func (p *Protobuf) CloseAsync() { 384 } 385 386 // WaitForClose blocks until the processor has closed down. 387 func (p *Protobuf) WaitForClose(timeout time.Duration) error { 388 return nil 389 } 390 391 //------------------------------------------------------------------------------