github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/hdfs.go (about) 1 package writer 2 3 import ( 4 "context" 5 "fmt" 6 "os" 7 "path/filepath" 8 "time" 9 10 "github.com/Jeffail/benthos/v3/internal/bloblang/field" 11 "github.com/Jeffail/benthos/v3/internal/interop" 12 "github.com/Jeffail/benthos/v3/lib/log" 13 "github.com/Jeffail/benthos/v3/lib/message/batch" 14 "github.com/Jeffail/benthos/v3/lib/metrics" 15 "github.com/Jeffail/benthos/v3/lib/types" 16 "github.com/colinmarc/hdfs" 17 ) 18 19 //------------------------------------------------------------------------------ 20 21 // HDFSConfig contains configuration fields for the HDFS output type. 22 type HDFSConfig struct { 23 Hosts []string `json:"hosts" yaml:"hosts"` 24 User string `json:"user" yaml:"user"` 25 Directory string `json:"directory" yaml:"directory"` 26 Path string `json:"path" yaml:"path"` 27 MaxInFlight int `json:"max_in_flight" yaml:"max_in_flight"` 28 Batching batch.PolicyConfig `json:"batching" yaml:"batching"` 29 } 30 31 // NewHDFSConfig creates a new Config with default values. 32 func NewHDFSConfig() HDFSConfig { 33 return HDFSConfig{ 34 Hosts: []string{"localhost:9000"}, 35 User: "benthos_hdfs", 36 Directory: "", 37 Path: `${!count("files")}-${!timestamp_unix_nano()}.txt`, 38 MaxInFlight: 1, 39 Batching: batch.NewPolicyConfig(), 40 } 41 } 42 43 //------------------------------------------------------------------------------ 44 45 // HDFS is a benthos writer.Type implementation that writes messages to a 46 // HDFS directory. 47 type HDFS struct { 48 conf HDFSConfig 49 50 path *field.Expression 51 52 client *hdfs.Client 53 54 log log.Modular 55 stats metrics.Type 56 } 57 58 // NewHDFS creates a new HDFS writer.Type. 59 // 60 // Deprecated: use the V2 API instead. 61 func NewHDFS( 62 conf HDFSConfig, 63 log log.Modular, 64 stats metrics.Type, 65 ) (*HDFS, error) { 66 return NewHDFSV2(conf, types.NoopMgr(), log, stats) 67 } 68 69 // NewHDFSV2 creates a new HDFS writer.Type. 70 func NewHDFSV2( 71 conf HDFSConfig, 72 mgr types.Manager, 73 log log.Modular, 74 stats metrics.Type, 75 ) (*HDFS, error) { 76 path, err := interop.NewBloblangField(mgr, conf.Path) 77 if err != nil { 78 return nil, fmt.Errorf("failed to parse path expression: %v", err) 79 } 80 return &HDFS{ 81 conf: conf, 82 path: path, 83 log: log, 84 stats: stats, 85 }, nil 86 } 87 88 // ConnectWithContext attempts to establish a connection to the target HDFS 89 // host. 90 func (h *HDFS) ConnectWithContext(ctx context.Context) error { 91 return h.Connect() 92 } 93 94 // Connect attempts to establish a connection to the target HDFS host. 95 func (h *HDFS) Connect() error { 96 if h.client != nil { 97 return nil 98 } 99 100 client, err := hdfs.NewClient(hdfs.ClientOptions{ 101 Addresses: h.conf.Hosts, 102 User: h.conf.User, 103 }) 104 if err != nil { 105 return err 106 } 107 108 h.client = client 109 110 h.log.Infof("Writing message parts as files to HDFS directory: %v\n", h.conf.Directory) 111 return nil 112 } 113 114 // WriteWithContext attempts to write message contents to a target HDFS 115 // directory as files. 116 func (h *HDFS) WriteWithContext(ctx context.Context, msg types.Message) error { 117 return h.Write(msg) 118 } 119 120 // Write attempts to write message contents to a target HDFS directory as files. 121 func (h *HDFS) Write(msg types.Message) error { 122 if h.client == nil { 123 return types.ErrNotConnected 124 } 125 126 return IterateBatchedSend(msg, func(i int, p types.Part) error { 127 path := h.path.String(i, msg) 128 filePath := filepath.Join(h.conf.Directory, path) 129 130 err := h.client.MkdirAll(h.conf.Directory, os.ModeDir|0o644) 131 if err != nil { 132 return err 133 } 134 135 fw, err := h.client.Create(filePath) 136 if err != nil { 137 return err 138 } 139 140 if _, err := fw.Write(p.Get()); err != nil { 141 return err 142 } 143 fw.Close() 144 return nil 145 }) 146 } 147 148 // CloseAsync begins cleaning up resources used by this reader asynchronously. 149 func (h *HDFS) CloseAsync() { 150 } 151 152 // WaitForClose will block until either the reader is closed or a specified 153 // timeout occurs. 154 func (h *HDFS) WaitForClose(time.Duration) error { 155 return nil 156 } 157 158 //------------------------------------------------------------------------------