github.com/Jeffail/benthos/v3@v3.65.0/lib/input/reader/hdfs.go (about)

     1  package reader
     2  
     3  import (
     4  	"context"
     5  	"path/filepath"
     6  	"time"
     7  
     8  	"github.com/Jeffail/benthos/v3/lib/log"
     9  	"github.com/Jeffail/benthos/v3/lib/message"
    10  	"github.com/Jeffail/benthos/v3/lib/metrics"
    11  	"github.com/Jeffail/benthos/v3/lib/types"
    12  	"github.com/colinmarc/hdfs"
    13  )
    14  
    15  //------------------------------------------------------------------------------
    16  
    17  // HDFSConfig contains configuration fields for the HDFS input type.
    18  type HDFSConfig struct {
    19  	Hosts     []string `json:"hosts" yaml:"hosts"`
    20  	User      string   `json:"user" yaml:"user"`
    21  	Directory string   `json:"directory" yaml:"directory"`
    22  }
    23  
    24  // NewHDFSConfig creates a new Config with default values.
    25  func NewHDFSConfig() HDFSConfig {
    26  	return HDFSConfig{
    27  		Hosts:     []string{"localhost:9000"},
    28  		User:      "benthos_hdfs",
    29  		Directory: "",
    30  	}
    31  }
    32  
    33  //------------------------------------------------------------------------------
    34  
    35  // HDFS is a benthos reader.Type implementation that reads messages from a
    36  // HDFS directory.
    37  type HDFS struct {
    38  	conf HDFSConfig
    39  
    40  	targets []string
    41  
    42  	client *hdfs.Client
    43  
    44  	log   log.Modular
    45  	stats metrics.Type
    46  }
    47  
    48  // NewHDFS creates a new HDFS writer.Type.
    49  func NewHDFS(
    50  	conf HDFSConfig,
    51  	log log.Modular,
    52  	stats metrics.Type,
    53  ) *HDFS {
    54  	return &HDFS{
    55  		conf:  conf,
    56  		log:   log,
    57  		stats: stats,
    58  	}
    59  }
    60  
    61  //------------------------------------------------------------------------------
    62  
    63  // Connect attempts to establish a connection to the target HDFS host.
    64  func (h *HDFS) Connect() error {
    65  	return h.ConnectWithContext(context.Background())
    66  }
    67  
    68  // ConnectWithContext attempts to establish a connection to the target HDFS
    69  // host.
    70  func (h *HDFS) ConnectWithContext(ctx context.Context) error {
    71  	if h.client != nil {
    72  		return nil
    73  	}
    74  
    75  	client, err := hdfs.NewClient(hdfs.ClientOptions{
    76  		Addresses: h.conf.Hosts,
    77  		User:      h.conf.User,
    78  	})
    79  	if err != nil {
    80  		return err
    81  	}
    82  
    83  	h.client = client
    84  	targets, err := client.ReadDir(h.conf.Directory)
    85  	if err != nil {
    86  		return err
    87  	}
    88  
    89  	for _, info := range targets {
    90  		if !info.IsDir() {
    91  			h.targets = append(h.targets, info.Name())
    92  		}
    93  	}
    94  
    95  	h.log.Infof("Receiving files from HDFS directory: %v\n", h.conf.Directory)
    96  	return nil
    97  }
    98  
    99  //------------------------------------------------------------------------------
   100  
   101  // ReadWithContext reads a new HDFS message.
   102  func (h *HDFS) ReadWithContext(ctx context.Context) (types.Message, AsyncAckFn, error) {
   103  	msg, err := h.Read()
   104  	if err != nil {
   105  		return nil, nil, err
   106  	}
   107  	return msg, noopAsyncAckFn, nil
   108  }
   109  
   110  // Read a new HDFS message.
   111  func (h *HDFS) Read() (types.Message, error) {
   112  	if len(h.targets) == 0 {
   113  		return nil, types.ErrTypeClosed
   114  	}
   115  
   116  	fileName := h.targets[0]
   117  	h.targets = h.targets[1:]
   118  
   119  	filePath := filepath.Join(h.conf.Directory, fileName)
   120  	msgBytes, readerr := h.client.ReadFile(filePath)
   121  	if readerr != nil {
   122  		return nil, readerr
   123  	}
   124  
   125  	msg := message.New([][]byte{msgBytes})
   126  	msg.Get(0).Metadata().Set("hdfs_name", fileName)
   127  	msg.Get(0).Metadata().Set("hdfs_path", filePath)
   128  	return msg, nil
   129  }
   130  
   131  // Acknowledge instructs whether unacknowledged messages have been successfully
   132  // propagated.
   133  func (h *HDFS) Acknowledge(err error) error {
   134  	return nil
   135  }
   136  
   137  // CloseAsync shuts down the HDFS input and stops processing requests.
   138  func (h *HDFS) CloseAsync() {
   139  }
   140  
   141  // WaitForClose blocks until the HDFS input has closed down.
   142  func (h *HDFS) WaitForClose(timeout time.Duration) error {
   143  	return nil
   144  }
   145  
   146  //------------------------------------------------------------------------------