github.com/argoproj/argo-events@v1.9.1/eventsources/sources/hdfs/start.go (about) 1 package hdfs 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "os" 8 "path/filepath" 9 "regexp" 10 "strings" 11 "time" 12 13 "github.com/colinmarc/hdfs" 14 "go.uber.org/zap" 15 16 "github.com/argoproj/argo-events/common/logging" 17 eventsourcecommon "github.com/argoproj/argo-events/eventsources/common" 18 "github.com/argoproj/argo-events/eventsources/common/fsevent" 19 "github.com/argoproj/argo-events/eventsources/common/naivewatcher" 20 "github.com/argoproj/argo-events/eventsources/sources" 21 metrics "github.com/argoproj/argo-events/metrics" 22 apicommon "github.com/argoproj/argo-events/pkg/apis/common" 23 "github.com/argoproj/argo-events/pkg/apis/eventsource/v1alpha1" 24 ) 25 26 // EventListener implements Eventing for HDFS events 27 type EventListener struct { 28 EventSourceName string 29 EventName string 30 HDFSEventSource v1alpha1.HDFSEventSource 31 Metrics *metrics.Metrics 32 } 33 34 // GetEventSourceName returns name of event source 35 func (el *EventListener) GetEventSourceName() string { 36 return el.EventSourceName 37 } 38 39 // GetEventName returns name of event 40 func (el *EventListener) GetEventName() string { 41 return el.EventName 42 } 43 44 // GetEventSourceType return type of event server 45 func (el *EventListener) GetEventSourceType() apicommon.EventSourceType { 46 return apicommon.HDFSEvent 47 } 48 49 // WatchableHDFS wraps hdfs.Client for naivewatcher 50 type WatchableHDFS struct { 51 hdfscli *hdfs.Client 52 } 53 54 // Walk walks a directory 55 func (w *WatchableHDFS) Walk(root string, walkFn filepath.WalkFunc) error { 56 return w.hdfscli.Walk(root, walkFn) 57 } 58 59 // GetFileID returns the file ID 60 func (w *WatchableHDFS) GetFileID(fi os.FileInfo) interface{} { 61 return fi.Name() 62 // FIXME: Use HDFS File ID once it's exposed 63 // https://github.com/colinmarc/hdfs/pull/171 64 // return fi.Sys().(*hadoop_hdfs.HdfsFileStatusProto).GetFileID() 65 } 66 67 // StartListening starts listening events 68 func (el *EventListener) StartListening(ctx context.Context, dispatch func([]byte, ...eventsourcecommon.Option) error) error { 69 log := logging.FromContext(ctx). 70 With(logging.LabelEventSourceType, el.GetEventSourceType(), logging.LabelEventName, el.GetEventName()) 71 log.Info("started processing the Emitter event source...") 72 defer sources.Recover(el.GetEventName()) 73 74 hdfsEventSource := &el.HDFSEventSource 75 76 log.Info("setting up HDFS configuration...") 77 hdfsConfig, err := createHDFSConfig(hdfsEventSource) 78 if err != nil { 79 return fmt.Errorf("failed to create HDFS configuration for %s, %w", el.GetEventName(), err) 80 } 81 82 log.Info("setting up HDFS client...") 83 hdfscli, err := createHDFSClient(hdfsConfig.Addresses, hdfsConfig.HDFSUser, hdfsConfig.KrbOptions) 84 if err != nil { 85 return fmt.Errorf("failed to create the HDFS client for %s, %w", el.GetEventName(), err) 86 } 87 defer hdfscli.Close() 88 89 log.Info("setting up a new watcher...") 90 watcher, err := naivewatcher.NewWatcher(&WatchableHDFS{hdfscli: hdfscli}) 91 if err != nil { 92 return fmt.Errorf("failed to create the HDFS watcher for %s, %w", el.GetEventName(), err) 93 } 94 defer watcher.Close() 95 96 intervalDuration := 1 * time.Minute 97 if hdfsEventSource.CheckInterval != "" { 98 d, err := time.ParseDuration(hdfsEventSource.CheckInterval) 99 if err != nil { 100 return fmt.Errorf("failed to parse the check in interval for %s, %w", el.GetEventName(), err) 101 } 102 intervalDuration = d 103 } 104 105 log.Info("started HDFS watcher") 106 err = watcher.Start(intervalDuration) 107 if err != nil { 108 return fmt.Errorf("failed to start the watcher for %s, %w", el.GetEventName(), err) 109 } 110 111 // directory to watch must be available in HDFS. You can't watch a directory that is not present. 112 log.Info("adding configured directory to watcher...") 113 err = watcher.Add(hdfsEventSource.Directory) 114 if err != nil { 115 return fmt.Errorf("failed to add directory %s for %s, %w", hdfsEventSource.Directory, el.GetEventName(), err) 116 } 117 118 op := fsevent.NewOp(hdfsEventSource.Type) 119 var pathRegexp *regexp.Regexp 120 if hdfsEventSource.PathRegexp != "" { 121 pathRegexp, err = regexp.Compile(hdfsEventSource.PathRegexp) 122 if err != nil { 123 return fmt.Errorf("failed to compile the path regex %s for %s, %w", hdfsEventSource.PathRegexp, el.GetEventName(), err) 124 } 125 } 126 127 log.Info("listening to HDFS notifications...") 128 for { 129 select { 130 case event, ok := <-watcher.Events: 131 if !ok { 132 log.Info("HDFS watcher has stopped") 133 // watcher stopped watching file events 134 return fmt.Errorf("watcher has been stopped for %s", el.GetEventName()) 135 } 136 event.Metadata = hdfsEventSource.Metadata 137 matched := false 138 relPath := strings.TrimPrefix(event.Name, hdfsEventSource.Directory) 139 140 if hdfsEventSource.Path != "" && hdfsEventSource.Path == relPath { 141 matched = true 142 } else if pathRegexp != nil && pathRegexp.MatchString(relPath) { 143 matched = true 144 } 145 146 if matched && (op&event.Op != 0) { 147 if err := el.handleOne(event, dispatch, log); err != nil { 148 log.Errorw("failed to process an HDFS event", zap.Error(err)) 149 el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName()) 150 } 151 } 152 case err := <-watcher.Errors: 153 return fmt.Errorf("failed to watch events for %s, %w", el.GetEventName(), err) 154 case <-ctx.Done(): 155 return nil 156 } 157 } 158 } 159 160 func (el *EventListener) handleOne(event fsevent.Event, dispatch func([]byte, ...eventsourcecommon.Option) error, log *zap.SugaredLogger) error { 161 defer func(start time.Time) { 162 el.Metrics.EventProcessingDuration(el.GetEventSourceName(), el.GetEventName(), float64(time.Since(start)/time.Millisecond)) 163 }(time.Now()) 164 165 logger := log.With( 166 "event-type", event.Op.String(), 167 "descriptor-name", event.Name, 168 ) 169 logger.Info("received an event") 170 171 payload, err := json.Marshal(event) 172 if err != nil { 173 return fmt.Errorf("failed to marshal the event data, rejecting event, %w", err) 174 } 175 176 logger.Info("dispatching event on data channel...") 177 if err = dispatch(payload); err != nil { 178 return fmt.Errorf("failed to dispatch an HDFS event, %w", err) 179 } 180 return nil 181 }