github.com/Jeffail/benthos/v3@v3.65.0/lib/output/writer/hdfs.go (about)

     1  package writer
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"path/filepath"
     8  	"time"
     9  
    10  	"github.com/Jeffail/benthos/v3/internal/bloblang/field"
    11  	"github.com/Jeffail/benthos/v3/internal/interop"
    12  	"github.com/Jeffail/benthos/v3/lib/log"
    13  	"github.com/Jeffail/benthos/v3/lib/message/batch"
    14  	"github.com/Jeffail/benthos/v3/lib/metrics"
    15  	"github.com/Jeffail/benthos/v3/lib/types"
    16  	"github.com/colinmarc/hdfs"
    17  )
    18  
    19  //------------------------------------------------------------------------------
    20  
    21  // HDFSConfig contains configuration fields for the HDFS output type.
    22  type HDFSConfig struct {
    23  	Hosts       []string           `json:"hosts" yaml:"hosts"`
    24  	User        string             `json:"user" yaml:"user"`
    25  	Directory   string             `json:"directory" yaml:"directory"`
    26  	Path        string             `json:"path" yaml:"path"`
    27  	MaxInFlight int                `json:"max_in_flight" yaml:"max_in_flight"`
    28  	Batching    batch.PolicyConfig `json:"batching" yaml:"batching"`
    29  }
    30  
    31  // NewHDFSConfig creates a new Config with default values.
    32  func NewHDFSConfig() HDFSConfig {
    33  	return HDFSConfig{
    34  		Hosts:       []string{"localhost:9000"},
    35  		User:        "benthos_hdfs",
    36  		Directory:   "",
    37  		Path:        `${!count("files")}-${!timestamp_unix_nano()}.txt`,
    38  		MaxInFlight: 1,
    39  		Batching:    batch.NewPolicyConfig(),
    40  	}
    41  }
    42  
    43  //------------------------------------------------------------------------------
    44  
    45  // HDFS is a benthos writer.Type implementation that writes messages to a
    46  // HDFS directory.
    47  type HDFS struct {
    48  	conf HDFSConfig
    49  
    50  	path *field.Expression
    51  
    52  	client *hdfs.Client
    53  
    54  	log   log.Modular
    55  	stats metrics.Type
    56  }
    57  
    58  // NewHDFS creates a new HDFS writer.Type.
    59  //
    60  // Deprecated: use the V2 API instead.
    61  func NewHDFS(
    62  	conf HDFSConfig,
    63  	log log.Modular,
    64  	stats metrics.Type,
    65  ) (*HDFS, error) {
    66  	return NewHDFSV2(conf, types.NoopMgr(), log, stats)
    67  }
    68  
    69  // NewHDFSV2 creates a new HDFS writer.Type.
    70  func NewHDFSV2(
    71  	conf HDFSConfig,
    72  	mgr types.Manager,
    73  	log log.Modular,
    74  	stats metrics.Type,
    75  ) (*HDFS, error) {
    76  	path, err := interop.NewBloblangField(mgr, conf.Path)
    77  	if err != nil {
    78  		return nil, fmt.Errorf("failed to parse path expression: %v", err)
    79  	}
    80  	return &HDFS{
    81  		conf:  conf,
    82  		path:  path,
    83  		log:   log,
    84  		stats: stats,
    85  	}, nil
    86  }
    87  
    88  // ConnectWithContext attempts to establish a connection to the target HDFS
    89  // host.
    90  func (h *HDFS) ConnectWithContext(ctx context.Context) error {
    91  	return h.Connect()
    92  }
    93  
    94  // Connect attempts to establish a connection to the target HDFS host.
    95  func (h *HDFS) Connect() error {
    96  	if h.client != nil {
    97  		return nil
    98  	}
    99  
   100  	client, err := hdfs.NewClient(hdfs.ClientOptions{
   101  		Addresses: h.conf.Hosts,
   102  		User:      h.conf.User,
   103  	})
   104  	if err != nil {
   105  		return err
   106  	}
   107  
   108  	h.client = client
   109  
   110  	h.log.Infof("Writing message parts as files to HDFS directory: %v\n", h.conf.Directory)
   111  	return nil
   112  }
   113  
   114  // WriteWithContext attempts to write message contents to a target HDFS
   115  // directory as files.
   116  func (h *HDFS) WriteWithContext(ctx context.Context, msg types.Message) error {
   117  	return h.Write(msg)
   118  }
   119  
   120  // Write attempts to write message contents to a target HDFS directory as files.
   121  func (h *HDFS) Write(msg types.Message) error {
   122  	if h.client == nil {
   123  		return types.ErrNotConnected
   124  	}
   125  
   126  	return IterateBatchedSend(msg, func(i int, p types.Part) error {
   127  		path := h.path.String(i, msg)
   128  		filePath := filepath.Join(h.conf.Directory, path)
   129  
   130  		err := h.client.MkdirAll(h.conf.Directory, os.ModeDir|0o644)
   131  		if err != nil {
   132  			return err
   133  		}
   134  
   135  		fw, err := h.client.Create(filePath)
   136  		if err != nil {
   137  			return err
   138  		}
   139  
   140  		if _, err := fw.Write(p.Get()); err != nil {
   141  			return err
   142  		}
   143  		fw.Close()
   144  		return nil
   145  	})
   146  }
   147  
   148  // CloseAsync begins cleaning up resources used by this reader asynchronously.
   149  func (h *HDFS) CloseAsync() {
   150  }
   151  
   152  // WaitForClose will block until either the reader is closed or a specified
   153  // timeout occurs.
   154  func (h *HDFS) WaitForClose(time.Duration) error {
   155  	return nil
   156  }
   157  
   158  //------------------------------------------------------------------------------