github.com/argoproj/argo-events@v1.9.1/eventsources/sources/hdfs/start.go (about)

     1  package hdfs
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  	"regexp"
    10  	"strings"
    11  	"time"
    12  
    13  	"github.com/colinmarc/hdfs"
    14  	"go.uber.org/zap"
    15  
    16  	"github.com/argoproj/argo-events/common/logging"
    17  	eventsourcecommon "github.com/argoproj/argo-events/eventsources/common"
    18  	"github.com/argoproj/argo-events/eventsources/common/fsevent"
    19  	"github.com/argoproj/argo-events/eventsources/common/naivewatcher"
    20  	"github.com/argoproj/argo-events/eventsources/sources"
    21  	metrics "github.com/argoproj/argo-events/metrics"
    22  	apicommon "github.com/argoproj/argo-events/pkg/apis/common"
    23  	"github.com/argoproj/argo-events/pkg/apis/eventsource/v1alpha1"
    24  )
    25  
    26  // EventListener implements Eventing for HDFS events
    27  type EventListener struct {
    28  	EventSourceName string
    29  	EventName       string
    30  	HDFSEventSource v1alpha1.HDFSEventSource
    31  	Metrics         *metrics.Metrics
    32  }
    33  
    34  // GetEventSourceName returns name of event source
    35  func (el *EventListener) GetEventSourceName() string {
    36  	return el.EventSourceName
    37  }
    38  
    39  // GetEventName returns name of event
    40  func (el *EventListener) GetEventName() string {
    41  	return el.EventName
    42  }
    43  
    44  // GetEventSourceType return type of event server
    45  func (el *EventListener) GetEventSourceType() apicommon.EventSourceType {
    46  	return apicommon.HDFSEvent
    47  }
    48  
    49  // WatchableHDFS wraps hdfs.Client for naivewatcher
    50  type WatchableHDFS struct {
    51  	hdfscli *hdfs.Client
    52  }
    53  
    54  // Walk walks a directory
    55  func (w *WatchableHDFS) Walk(root string, walkFn filepath.WalkFunc) error {
    56  	return w.hdfscli.Walk(root, walkFn)
    57  }
    58  
    59  // GetFileID returns the file ID
    60  func (w *WatchableHDFS) GetFileID(fi os.FileInfo) interface{} {
    61  	return fi.Name()
    62  	// FIXME: Use HDFS File ID once it's exposed
    63  	//   https://github.com/colinmarc/hdfs/pull/171
    64  	// return fi.Sys().(*hadoop_hdfs.HdfsFileStatusProto).GetFileID()
    65  }
    66  
    67  // StartListening starts listening events
    68  func (el *EventListener) StartListening(ctx context.Context, dispatch func([]byte, ...eventsourcecommon.Option) error) error {
    69  	log := logging.FromContext(ctx).
    70  		With(logging.LabelEventSourceType, el.GetEventSourceType(), logging.LabelEventName, el.GetEventName())
    71  	log.Info("started processing the Emitter event source...")
    72  	defer sources.Recover(el.GetEventName())
    73  
    74  	hdfsEventSource := &el.HDFSEventSource
    75  
    76  	log.Info("setting up HDFS configuration...")
    77  	hdfsConfig, err := createHDFSConfig(hdfsEventSource)
    78  	if err != nil {
    79  		return fmt.Errorf("failed to create HDFS configuration for %s, %w", el.GetEventName(), err)
    80  	}
    81  
    82  	log.Info("setting up HDFS client...")
    83  	hdfscli, err := createHDFSClient(hdfsConfig.Addresses, hdfsConfig.HDFSUser, hdfsConfig.KrbOptions)
    84  	if err != nil {
    85  		return fmt.Errorf("failed to create the HDFS client for %s, %w", el.GetEventName(), err)
    86  	}
    87  	defer hdfscli.Close()
    88  
    89  	log.Info("setting up a new watcher...")
    90  	watcher, err := naivewatcher.NewWatcher(&WatchableHDFS{hdfscli: hdfscli})
    91  	if err != nil {
    92  		return fmt.Errorf("failed to create the HDFS watcher for %s, %w", el.GetEventName(), err)
    93  	}
    94  	defer watcher.Close()
    95  
    96  	intervalDuration := 1 * time.Minute
    97  	if hdfsEventSource.CheckInterval != "" {
    98  		d, err := time.ParseDuration(hdfsEventSource.CheckInterval)
    99  		if err != nil {
   100  			return fmt.Errorf("failed to parse the check in interval for %s, %w", el.GetEventName(), err)
   101  		}
   102  		intervalDuration = d
   103  	}
   104  
   105  	log.Info("started HDFS watcher")
   106  	err = watcher.Start(intervalDuration)
   107  	if err != nil {
   108  		return fmt.Errorf("failed to start the watcher for %s, %w", el.GetEventName(), err)
   109  	}
   110  
   111  	// directory to watch must be available in HDFS. You can't watch a directory that is not present.
   112  	log.Info("adding configured directory to watcher...")
   113  	err = watcher.Add(hdfsEventSource.Directory)
   114  	if err != nil {
   115  		return fmt.Errorf("failed to add directory %s for %s, %w", hdfsEventSource.Directory, el.GetEventName(), err)
   116  	}
   117  
   118  	op := fsevent.NewOp(hdfsEventSource.Type)
   119  	var pathRegexp *regexp.Regexp
   120  	if hdfsEventSource.PathRegexp != "" {
   121  		pathRegexp, err = regexp.Compile(hdfsEventSource.PathRegexp)
   122  		if err != nil {
   123  			return fmt.Errorf("failed to compile the path regex %s for %s, %w", hdfsEventSource.PathRegexp, el.GetEventName(), err)
   124  		}
   125  	}
   126  
   127  	log.Info("listening to HDFS notifications...")
   128  	for {
   129  		select {
   130  		case event, ok := <-watcher.Events:
   131  			if !ok {
   132  				log.Info("HDFS watcher has stopped")
   133  				// watcher stopped watching file events
   134  				return fmt.Errorf("watcher has been stopped for %s", el.GetEventName())
   135  			}
   136  			event.Metadata = hdfsEventSource.Metadata
   137  			matched := false
   138  			relPath := strings.TrimPrefix(event.Name, hdfsEventSource.Directory)
   139  
   140  			if hdfsEventSource.Path != "" && hdfsEventSource.Path == relPath {
   141  				matched = true
   142  			} else if pathRegexp != nil && pathRegexp.MatchString(relPath) {
   143  				matched = true
   144  			}
   145  
   146  			if matched && (op&event.Op != 0) {
   147  				if err := el.handleOne(event, dispatch, log); err != nil {
   148  					log.Errorw("failed to process an HDFS event", zap.Error(err))
   149  					el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName())
   150  				}
   151  			}
   152  		case err := <-watcher.Errors:
   153  			return fmt.Errorf("failed to watch events for %s, %w", el.GetEventName(), err)
   154  		case <-ctx.Done():
   155  			return nil
   156  		}
   157  	}
   158  }
   159  
   160  func (el *EventListener) handleOne(event fsevent.Event, dispatch func([]byte, ...eventsourcecommon.Option) error, log *zap.SugaredLogger) error {
   161  	defer func(start time.Time) {
   162  		el.Metrics.EventProcessingDuration(el.GetEventSourceName(), el.GetEventName(), float64(time.Since(start)/time.Millisecond))
   163  	}(time.Now())
   164  
   165  	logger := log.With(
   166  		"event-type", event.Op.String(),
   167  		"descriptor-name", event.Name,
   168  	)
   169  	logger.Info("received an event")
   170  
   171  	payload, err := json.Marshal(event)
   172  	if err != nil {
   173  		return fmt.Errorf("failed to marshal the event data, rejecting event, %w", err)
   174  	}
   175  
   176  	logger.Info("dispatching event on data channel...")
   177  	if err = dispatch(payload); err != nil {
   178  		return fmt.Errorf("failed to dispatch an HDFS event, %w", err)
   179  	}
   180  	return nil
   181  }