github.com/argoproj/argo-events@v1.9.1/eventsources/sources/sftp/start.go (about) 1 /* 2 Copyright 2018 BlackRock, Inc. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sftp 18 19 import ( 20 "context" 21 "encoding/json" 22 "fmt" 23 "io/fs" 24 "os" 25 "regexp" 26 "strings" 27 "time" 28 29 "github.com/fsnotify/fsnotify" 30 "github.com/pkg/sftp" 31 "go.uber.org/zap" 32 "golang.org/x/crypto/ssh" 33 34 "github.com/argoproj/argo-events/common" 35 "github.com/argoproj/argo-events/common/logging" 36 eventsourcecommon "github.com/argoproj/argo-events/eventsources/common" 37 "github.com/argoproj/argo-events/eventsources/common/fsevent" 38 "github.com/argoproj/argo-events/eventsources/sources" 39 metrics "github.com/argoproj/argo-events/metrics" 40 apicommon "github.com/argoproj/argo-events/pkg/apis/common" 41 "github.com/argoproj/argo-events/pkg/apis/eventsource/v1alpha1" 42 ) 43 44 // EventListener implements Eventing for sftp event source 45 type EventListener struct { 46 EventSourceName string 47 EventName string 48 SFTPEventSource v1alpha1.SFTPEventSource 49 Metrics *metrics.Metrics 50 } 51 52 // GetEventSourceName returns name of event source 53 func (el *EventListener) GetEventSourceName() string { 54 return el.EventSourceName 55 } 56 57 // GetEventName returns name of event 58 func (el *EventListener) GetEventName() string { 59 return el.EventName 60 } 61 62 // GetEventSourceType return type of event server 63 func (el *EventListener) GetEventSourceType() apicommon.EventSourceType { 64 return apicommon.SFTPEvent 65 } 66 67 // StartListening starts listening events 68 func (el *EventListener) StartListening(ctx context.Context, dispatch func([]byte, ...eventsourcecommon.Option) error) error { 69 log := logging.FromContext(ctx). 70 With(logging.LabelEventSourceType, el.GetEventSourceType(), logging.LabelEventName, el.GetEventName()) 71 defer sources.Recover(el.GetEventName()) 72 73 username, err := common.GetSecretFromVolume(el.SFTPEventSource.Username) 74 if err != nil { 75 return fmt.Errorf("username not found, %w", err) 76 } 77 address, err := common.GetSecretFromVolume(el.SFTPEventSource.Address) 78 if err != nil { 79 return fmt.Errorf("address not found, %w", err) 80 } 81 82 var authMethod ssh.AuthMethod 83 var hostKeyCallback ssh.HostKeyCallback 84 85 if el.SFTPEventSource.SSHKeySecret != nil { 86 sshKeyPath, err := common.GetSecretVolumePath(el.SFTPEventSource.SSHKeySecret) 87 if err != nil { 88 return fmt.Errorf("failed to get SSH key from mounted volume, %w", err) 89 } 90 sshKey, err := os.ReadFile(sshKeyPath) 91 if err != nil { 92 return fmt.Errorf("failed to read ssh key file. err: %+v", err) 93 } 94 signer, err := ssh.ParsePrivateKey(sshKey) 95 if err != nil { 96 return fmt.Errorf("failed to parse private ssh key. err: %+v", err) 97 } 98 publicKey, err := ssh.ParsePublicKey(sshKey) 99 if err != nil { 100 return fmt.Errorf("failed to parse public ssh key. err: %+v", err) 101 } 102 authMethod = ssh.PublicKeys(signer) 103 hostKeyCallback = ssh.FixedHostKey(publicKey) 104 } else { 105 password, err := common.GetSecretFromVolume(el.SFTPEventSource.Password) 106 if err != nil { 107 return fmt.Errorf("password not found, %w", err) 108 } 109 authMethod = ssh.Password(password) 110 hostKeyCallback = ssh.InsecureIgnoreHostKey() 111 } 112 113 sftpConfig := &ssh.ClientConfig{ 114 User: username, 115 Auth: []ssh.AuthMethod{authMethod}, 116 HostKeyCallback: hostKeyCallback, 117 } 118 119 var sshClient *ssh.Client 120 err = common.DoWithRetry(nil, func() error { 121 var err error 122 sshClient, err = ssh.Dial("tcp", address, sftpConfig) 123 return err 124 }) 125 if err != nil { 126 return fmt.Errorf("dialing sftp address %s: %w", address, err) 127 } 128 129 sftpClient, err := sftp.NewClient(sshClient) 130 if err != nil { 131 return fmt.Errorf("new sftp client: %w", err) 132 } 133 defer sftpClient.Close() 134 135 if err := el.listenEvents(ctx, sftpClient, dispatch, log); err != nil { 136 log.Error("failed to listen to events", zap.Error(err)) 137 return err 138 } 139 return nil 140 } 141 142 // listenEvents listen to sftp related events. 143 func (el *EventListener) listenEvents(ctx context.Context, sftpClient *sftp.Client, dispatch func([]byte, ...eventsourcecommon.Option) error, log *zap.SugaredLogger) error { 144 sftpEventSource := &el.SFTPEventSource 145 146 log.Info("identifying new files in sftp...") 147 startingFiles, err := sftpNonDirFiles(sftpClient, sftpEventSource.WatchPathConfig.Directory) 148 if err != nil { 149 return fmt.Errorf("failed to read directory %s for %s, %w", sftpEventSource.WatchPathConfig.Directory, el.GetEventName(), err) 150 } 151 152 // TODO: do we need some sort of stateful mechanism to capture changes between event source restarts? 153 // This would allow loading startingFiles from store/cache rather than initializing starting files from remote sftp source 154 155 var pathRegexp *regexp.Regexp 156 if sftpEventSource.WatchPathConfig.PathRegexp != "" { 157 log.Infow("matching file path with configured regex...", zap.Any("regex", sftpEventSource.WatchPathConfig.PathRegexp)) 158 pathRegexp, err = regexp.Compile(sftpEventSource.WatchPathConfig.PathRegexp) 159 if err != nil { 160 return fmt.Errorf("failed to match file path with configured regex %s for %s, %w", sftpEventSource.WatchPathConfig.PathRegexp, el.GetEventName(), err) 161 } 162 } 163 164 processOne := func(event fsnotify.Event) error { 165 defer func(start time.Time) { 166 el.Metrics.EventProcessingDuration(el.GetEventSourceName(), el.GetEventName(), float64(time.Since(start)/time.Millisecond)) 167 }(time.Now()) 168 169 log.Infow("sftp event", zap.Any("event-type", event.Op.String()), zap.Any("descriptor-name", event.Name)) 170 171 fileEvent := fsevent.Event{Name: event.Name, Op: fsevent.NewOp(event.Op.String()), Metadata: el.SFTPEventSource.Metadata} 172 payload, err := json.Marshal(fileEvent) 173 if err != nil { 174 return fmt.Errorf("failed to marshal the event to the fs event, %w", err) 175 } 176 log.Infow("dispatching sftp event on data channel...", zap.Any("event-type", event.Op.String()), zap.Any("descriptor-name", event.Name)) 177 if err = dispatch(payload); err != nil { 178 return fmt.Errorf("failed to dispatch an sftp event, %w", err) 179 } 180 return nil 181 } 182 183 maybeProcess := func(fi fs.FileInfo, op fsnotify.Op) error { 184 matched := false 185 relPath := strings.TrimPrefix(fi.Name(), sftpEventSource.WatchPathConfig.Directory) 186 if sftpEventSource.WatchPathConfig.Path != "" && sftpEventSource.WatchPathConfig.Path == relPath { 187 matched = true 188 } else if pathRegexp != nil && pathRegexp.MatchString(relPath) { 189 matched = true 190 } 191 if matched && sftpEventSource.EventType == op.String() { 192 if err = processOne(fsnotify.Event{ 193 Name: fi.Name(), 194 Op: op, 195 }); err != nil { 196 log.Errorw("failed to process a sftp event", zap.Error(err)) 197 el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName()) 198 } 199 } 200 201 return nil 202 } 203 204 pollIntervalDuration := time.Second * 10 205 if d, err := time.ParseDuration(sftpEventSource.PollIntervalDuration); err != nil { 206 pollIntervalDuration = d 207 } else { 208 log.Errorw("failed parsing poll interval duration.. falling back to %s: %w", pollIntervalDuration.String(), err) 209 } 210 211 log.Info("listening to sftp notifications... polling interval %s", pollIntervalDuration.String()) 212 for { 213 select { 214 case <-time.After(pollIntervalDuration): 215 216 files, err := sftpNonDirFiles(sftpClient, sftpEventSource.WatchPathConfig.Directory) 217 if err != nil { 218 return fmt.Errorf("failed to read directory %s for %s, %w", sftpEventSource.WatchPathConfig.Directory, el.GetEventName(), err) 219 } 220 221 fileDiff := diffFiles(startingFiles, files) 222 if fileDiff.isEmpty() { 223 continue 224 } 225 226 log.Infof("found %d new files and %d removed files", len(fileDiff.new), len(fileDiff.removed)) 227 228 for _, fi := range fileDiff.removed { 229 if err = maybeProcess(fi, fsnotify.Remove); err != nil { 230 log.Errorw("failed to process a file event", zap.Error(err)) 231 el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName()) 232 } 233 } 234 for _, fi := range fileDiff.new { 235 if err = maybeProcess(fi, fsnotify.Create); err != nil { 236 log.Errorw("failed to process a file event", zap.Error(err)) 237 el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName()) 238 } 239 } 240 241 // TODO: errors processing files will result in dropped events 242 // adjusting the logic for overwriting startingFiles could enable the next tick 243 // to capture the event 244 startingFiles = files 245 246 case <-ctx.Done(): 247 log.Info("event source has been stopped") 248 return nil 249 } 250 } 251 } 252 253 func sftpNonDirFiles(sftpClient *sftp.Client, dir string) ([]fs.FileInfo, error) { 254 var files []fs.FileInfo 255 err := common.DoWithRetry(nil, func() error { 256 var err error 257 files, err = sftpClient.ReadDir(dir) 258 return err 259 }) 260 if err != nil { 261 return nil, err 262 } 263 var nonDirFiles []fs.FileInfo 264 for _, f := range files { 265 if !f.IsDir() { 266 nonDirFiles = append(nonDirFiles, f) 267 } 268 } 269 270 files = nonDirFiles 271 return files, nil 272 } 273 274 type fileDiff struct { 275 new []fs.FileInfo 276 removed []fs.FileInfo 277 } 278 279 func (f fileDiff) isEmpty() bool { 280 return (len(f.new) + len(f.removed)) == 0 281 } 282 283 func diffFiles(startingFiles, currentFiles []fs.FileInfo) fileDiff { 284 fileMap := make(map[string]fs.FileInfo) 285 for _, file := range currentFiles { 286 fileMap[file.Name()] = file 287 } 288 289 var diff fileDiff 290 291 for _, startingFile := range startingFiles { 292 name := startingFile.Name() 293 294 if newFile, ok := fileMap[name]; !ok { 295 diff.removed = append(diff.removed, startingFile) 296 } else { 297 delete(fileMap, name) 298 299 if newFile.Size() != startingFile.Size() || newFile.ModTime() != startingFile.ModTime() { 300 diff.new = append(diff.new, newFile) 301 } 302 } 303 } 304 305 for _, newFile := range fileMap { 306 diff.new = append(diff.new, newFile) 307 } 308 309 return diff 310 }