github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/hdfs.go (about) 1 package reader 2 3 import ( 4 "context" 5 "path/filepath" 6 "time" 7 8 "github.com/Jeffail/benthos/v3/lib/log" 9 "github.com/Jeffail/benthos/v3/lib/message" 10 "github.com/Jeffail/benthos/v3/lib/metrics" 11 "github.com/Jeffail/benthos/v3/lib/types" 12 "github.com/colinmarc/hdfs" 13 ) 14 15 //------------------------------------------------------------------------------ 16 17 // HDFSConfig contains configuration fields for the HDFS input type. 18 type HDFSConfig struct { 19 Hosts []string `json:"hosts" yaml:"hosts"` 20 User string `json:"user" yaml:"user"` 21 Directory string `json:"directory" yaml:"directory"` 22 } 23 24 // NewHDFSConfig creates a new Config with default values. 25 func NewHDFSConfig() HDFSConfig { 26 return HDFSConfig{ 27 Hosts: []string{"localhost:9000"}, 28 User: "benthos_hdfs", 29 Directory: "", 30 } 31 } 32 33 //------------------------------------------------------------------------------ 34 35 // HDFS is a benthos reader.Type implementation that reads messages from a 36 // HDFS directory. 37 type HDFS struct { 38 conf HDFSConfig 39 40 targets []string 41 42 client *hdfs.Client 43 44 log log.Modular 45 stats metrics.Type 46 } 47 48 // NewHDFS creates a new HDFS writer.Type. 49 func NewHDFS( 50 conf HDFSConfig, 51 log log.Modular, 52 stats metrics.Type, 53 ) *HDFS { 54 return &HDFS{ 55 conf: conf, 56 log: log, 57 stats: stats, 58 } 59 } 60 61 //------------------------------------------------------------------------------ 62 63 // Connect attempts to establish a connection to the target HDFS host. 64 func (h *HDFS) Connect() error { 65 return h.ConnectWithContext(context.Background()) 66 } 67 68 // ConnectWithContext attempts to establish a connection to the target HDFS 69 // host. 70 func (h *HDFS) ConnectWithContext(ctx context.Context) error { 71 if h.client != nil { 72 return nil 73 } 74 75 client, err := hdfs.NewClient(hdfs.ClientOptions{ 76 Addresses: h.conf.Hosts, 77 User: h.conf.User, 78 }) 79 if err != nil { 80 return err 81 } 82 83 h.client = client 84 targets, err := client.ReadDir(h.conf.Directory) 85 if err != nil { 86 return err 87 } 88 89 for _, info := range targets { 90 if !info.IsDir() { 91 h.targets = append(h.targets, info.Name()) 92 } 93 } 94 95 h.log.Infof("Receiving files from HDFS directory: %v\n", h.conf.Directory) 96 return nil 97 } 98 99 //------------------------------------------------------------------------------ 100 101 // ReadWithContext reads a new HDFS message. 102 func (h *HDFS) ReadWithContext(ctx context.Context) (types.Message, AsyncAckFn, error) { 103 msg, err := h.Read() 104 if err != nil { 105 return nil, nil, err 106 } 107 return msg, noopAsyncAckFn, nil 108 } 109 110 // Read a new HDFS message. 111 func (h *HDFS) Read() (types.Message, error) { 112 if len(h.targets) == 0 { 113 return nil, types.ErrTypeClosed 114 } 115 116 fileName := h.targets[0] 117 h.targets = h.targets[1:] 118 119 filePath := filepath.Join(h.conf.Directory, fileName) 120 msgBytes, readerr := h.client.ReadFile(filePath) 121 if readerr != nil { 122 return nil, readerr 123 } 124 125 msg := message.New([][]byte{msgBytes}) 126 msg.Get(0).Metadata().Set("hdfs_name", fileName) 127 msg.Get(0).Metadata().Set("hdfs_path", filePath) 128 return msg, nil 129 } 130 131 // Acknowledge instructs whether unacknowledged messages have been successfully 132 // propagated. 133 func (h *HDFS) Acknowledge(err error) error { 134 return nil 135 } 136 137 // CloseAsync shuts down the HDFS input and stops processing requests. 138 func (h *HDFS) CloseAsync() { 139 } 140 141 // WaitForClose blocks until the HDFS input has closed down. 142 func (h *HDFS) WaitForClose(timeout time.Duration) error { 143 return nil 144 } 145 146 //------------------------------------------------------------------------------