github.com/argoproj/argo-events@v1.9.1/eventsources/sources/sftp/start.go (about)

     1  /*
     2  Copyright 2018 BlackRock, Inc.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8  	http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sftp
    18  
    19  import (
    20  	"context"
    21  	"encoding/json"
    22  	"fmt"
    23  	"io/fs"
    24  	"os"
    25  	"regexp"
    26  	"strings"
    27  	"time"
    28  
    29  	"github.com/fsnotify/fsnotify"
    30  	"github.com/pkg/sftp"
    31  	"go.uber.org/zap"
    32  	"golang.org/x/crypto/ssh"
    33  
    34  	"github.com/argoproj/argo-events/common"
    35  	"github.com/argoproj/argo-events/common/logging"
    36  	eventsourcecommon "github.com/argoproj/argo-events/eventsources/common"
    37  	"github.com/argoproj/argo-events/eventsources/common/fsevent"
    38  	"github.com/argoproj/argo-events/eventsources/sources"
    39  	metrics "github.com/argoproj/argo-events/metrics"
    40  	apicommon "github.com/argoproj/argo-events/pkg/apis/common"
    41  	"github.com/argoproj/argo-events/pkg/apis/eventsource/v1alpha1"
    42  )
    43  
    44  // EventListener implements Eventing for sftp event source
    45  type EventListener struct {
    46  	EventSourceName string
    47  	EventName       string
    48  	SFTPEventSource v1alpha1.SFTPEventSource
    49  	Metrics         *metrics.Metrics
    50  }
    51  
    52  // GetEventSourceName returns name of event source
    53  func (el *EventListener) GetEventSourceName() string {
    54  	return el.EventSourceName
    55  }
    56  
    57  // GetEventName returns name of event
    58  func (el *EventListener) GetEventName() string {
    59  	return el.EventName
    60  }
    61  
    62  // GetEventSourceType return type of event server
    63  func (el *EventListener) GetEventSourceType() apicommon.EventSourceType {
    64  	return apicommon.SFTPEvent
    65  }
    66  
    67  // StartListening starts listening events
    68  func (el *EventListener) StartListening(ctx context.Context, dispatch func([]byte, ...eventsourcecommon.Option) error) error {
    69  	log := logging.FromContext(ctx).
    70  		With(logging.LabelEventSourceType, el.GetEventSourceType(), logging.LabelEventName, el.GetEventName())
    71  	defer sources.Recover(el.GetEventName())
    72  
    73  	username, err := common.GetSecretFromVolume(el.SFTPEventSource.Username)
    74  	if err != nil {
    75  		return fmt.Errorf("username not found, %w", err)
    76  	}
    77  	address, err := common.GetSecretFromVolume(el.SFTPEventSource.Address)
    78  	if err != nil {
    79  		return fmt.Errorf("address not found, %w", err)
    80  	}
    81  
    82  	var authMethod ssh.AuthMethod
    83  	var hostKeyCallback ssh.HostKeyCallback
    84  
    85  	if el.SFTPEventSource.SSHKeySecret != nil {
    86  		sshKeyPath, err := common.GetSecretVolumePath(el.SFTPEventSource.SSHKeySecret)
    87  		if err != nil {
    88  			return fmt.Errorf("failed to get SSH key from mounted volume, %w", err)
    89  		}
    90  		sshKey, err := os.ReadFile(sshKeyPath)
    91  		if err != nil {
    92  			return fmt.Errorf("failed to read ssh key file. err: %+v", err)
    93  		}
    94  		signer, err := ssh.ParsePrivateKey(sshKey)
    95  		if err != nil {
    96  			return fmt.Errorf("failed to parse private ssh key. err: %+v", err)
    97  		}
    98  		publicKey, err := ssh.ParsePublicKey(sshKey)
    99  		if err != nil {
   100  			return fmt.Errorf("failed to parse public ssh key. err: %+v", err)
   101  		}
   102  		authMethod = ssh.PublicKeys(signer)
   103  		hostKeyCallback = ssh.FixedHostKey(publicKey)
   104  	} else {
   105  		password, err := common.GetSecretFromVolume(el.SFTPEventSource.Password)
   106  		if err != nil {
   107  			return fmt.Errorf("password not found, %w", err)
   108  		}
   109  		authMethod = ssh.Password(password)
   110  		hostKeyCallback = ssh.InsecureIgnoreHostKey()
   111  	}
   112  
   113  	sftpConfig := &ssh.ClientConfig{
   114  		User:            username,
   115  		Auth:            []ssh.AuthMethod{authMethod},
   116  		HostKeyCallback: hostKeyCallback,
   117  	}
   118  
   119  	var sshClient *ssh.Client
   120  	err = common.DoWithRetry(nil, func() error {
   121  		var err error
   122  		sshClient, err = ssh.Dial("tcp", address, sftpConfig)
   123  		return err
   124  	})
   125  	if err != nil {
   126  		return fmt.Errorf("dialing sftp address %s: %w", address, err)
   127  	}
   128  
   129  	sftpClient, err := sftp.NewClient(sshClient)
   130  	if err != nil {
   131  		return fmt.Errorf("new sftp client: %w", err)
   132  	}
   133  	defer sftpClient.Close()
   134  
   135  	if err := el.listenEvents(ctx, sftpClient, dispatch, log); err != nil {
   136  		log.Error("failed to listen to events", zap.Error(err))
   137  		return err
   138  	}
   139  	return nil
   140  }
   141  
   142  // listenEvents listen to sftp related events.
   143  func (el *EventListener) listenEvents(ctx context.Context, sftpClient *sftp.Client, dispatch func([]byte, ...eventsourcecommon.Option) error, log *zap.SugaredLogger) error {
   144  	sftpEventSource := &el.SFTPEventSource
   145  
   146  	log.Info("identifying new files in sftp...")
   147  	startingFiles, err := sftpNonDirFiles(sftpClient, sftpEventSource.WatchPathConfig.Directory)
   148  	if err != nil {
   149  		return fmt.Errorf("failed to read directory %s for %s, %w", sftpEventSource.WatchPathConfig.Directory, el.GetEventName(), err)
   150  	}
   151  
   152  	// TODO: do we need some sort of stateful mechanism to capture changes between event source restarts?
   153  	// This would allow loading startingFiles from store/cache rather than initializing starting files from  remote sftp source
   154  
   155  	var pathRegexp *regexp.Regexp
   156  	if sftpEventSource.WatchPathConfig.PathRegexp != "" {
   157  		log.Infow("matching file path with configured regex...", zap.Any("regex", sftpEventSource.WatchPathConfig.PathRegexp))
   158  		pathRegexp, err = regexp.Compile(sftpEventSource.WatchPathConfig.PathRegexp)
   159  		if err != nil {
   160  			return fmt.Errorf("failed to match file path with configured regex %s for %s, %w", sftpEventSource.WatchPathConfig.PathRegexp, el.GetEventName(), err)
   161  		}
   162  	}
   163  
   164  	processOne := func(event fsnotify.Event) error {
   165  		defer func(start time.Time) {
   166  			el.Metrics.EventProcessingDuration(el.GetEventSourceName(), el.GetEventName(), float64(time.Since(start)/time.Millisecond))
   167  		}(time.Now())
   168  
   169  		log.Infow("sftp event", zap.Any("event-type", event.Op.String()), zap.Any("descriptor-name", event.Name))
   170  
   171  		fileEvent := fsevent.Event{Name: event.Name, Op: fsevent.NewOp(event.Op.String()), Metadata: el.SFTPEventSource.Metadata}
   172  		payload, err := json.Marshal(fileEvent)
   173  		if err != nil {
   174  			return fmt.Errorf("failed to marshal the event to the fs event, %w", err)
   175  		}
   176  		log.Infow("dispatching sftp event on data channel...", zap.Any("event-type", event.Op.String()), zap.Any("descriptor-name", event.Name))
   177  		if err = dispatch(payload); err != nil {
   178  			return fmt.Errorf("failed to dispatch an sftp event, %w", err)
   179  		}
   180  		return nil
   181  	}
   182  
   183  	maybeProcess := func(fi fs.FileInfo, op fsnotify.Op) error {
   184  		matched := false
   185  		relPath := strings.TrimPrefix(fi.Name(), sftpEventSource.WatchPathConfig.Directory)
   186  		if sftpEventSource.WatchPathConfig.Path != "" && sftpEventSource.WatchPathConfig.Path == relPath {
   187  			matched = true
   188  		} else if pathRegexp != nil && pathRegexp.MatchString(relPath) {
   189  			matched = true
   190  		}
   191  		if matched && sftpEventSource.EventType == op.String() {
   192  			if err = processOne(fsnotify.Event{
   193  				Name: fi.Name(),
   194  				Op:   op,
   195  			}); err != nil {
   196  				log.Errorw("failed to process a sftp event", zap.Error(err))
   197  				el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName())
   198  			}
   199  		}
   200  
   201  		return nil
   202  	}
   203  
   204  	pollIntervalDuration := time.Second * 10
   205  	if d, err := time.ParseDuration(sftpEventSource.PollIntervalDuration); err != nil {
   206  		pollIntervalDuration = d
   207  	} else {
   208  		log.Errorw("failed parsing poll interval duration.. falling back to %s: %w", pollIntervalDuration.String(), err)
   209  	}
   210  
   211  	log.Info("listening to sftp notifications... polling interval %s", pollIntervalDuration.String())
   212  	for {
   213  		select {
   214  		case <-time.After(pollIntervalDuration):
   215  
   216  			files, err := sftpNonDirFiles(sftpClient, sftpEventSource.WatchPathConfig.Directory)
   217  			if err != nil {
   218  				return fmt.Errorf("failed to read directory %s for %s, %w", sftpEventSource.WatchPathConfig.Directory, el.GetEventName(), err)
   219  			}
   220  
   221  			fileDiff := diffFiles(startingFiles, files)
   222  			if fileDiff.isEmpty() {
   223  				continue
   224  			}
   225  
   226  			log.Infof("found %d new files and %d removed files", len(fileDiff.new), len(fileDiff.removed))
   227  
   228  			for _, fi := range fileDiff.removed {
   229  				if err = maybeProcess(fi, fsnotify.Remove); err != nil {
   230  					log.Errorw("failed to process a file event", zap.Error(err))
   231  					el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName())
   232  				}
   233  			}
   234  			for _, fi := range fileDiff.new {
   235  				if err = maybeProcess(fi, fsnotify.Create); err != nil {
   236  					log.Errorw("failed to process a file event", zap.Error(err))
   237  					el.Metrics.EventProcessingFailed(el.GetEventSourceName(), el.GetEventName())
   238  				}
   239  			}
   240  
   241  			// TODO: errors processing files will result in dropped events
   242  			// adjusting the logic for overwriting startingFiles could enable the next tick
   243  			// to capture the event
   244  			startingFiles = files
   245  
   246  		case <-ctx.Done():
   247  			log.Info("event source has been stopped")
   248  			return nil
   249  		}
   250  	}
   251  }
   252  
   253  func sftpNonDirFiles(sftpClient *sftp.Client, dir string) ([]fs.FileInfo, error) {
   254  	var files []fs.FileInfo
   255  	err := common.DoWithRetry(nil, func() error {
   256  		var err error
   257  		files, err = sftpClient.ReadDir(dir)
   258  		return err
   259  	})
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	var nonDirFiles []fs.FileInfo
   264  	for _, f := range files {
   265  		if !f.IsDir() {
   266  			nonDirFiles = append(nonDirFiles, f)
   267  		}
   268  	}
   269  
   270  	files = nonDirFiles
   271  	return files, nil
   272  }
   273  
   274  type fileDiff struct {
   275  	new     []fs.FileInfo
   276  	removed []fs.FileInfo
   277  }
   278  
   279  func (f fileDiff) isEmpty() bool {
   280  	return (len(f.new) + len(f.removed)) == 0
   281  }
   282  
   283  func diffFiles(startingFiles, currentFiles []fs.FileInfo) fileDiff {
   284  	fileMap := make(map[string]fs.FileInfo)
   285  	for _, file := range currentFiles {
   286  		fileMap[file.Name()] = file
   287  	}
   288  
   289  	var diff fileDiff
   290  
   291  	for _, startingFile := range startingFiles {
   292  		name := startingFile.Name()
   293  
   294  		if newFile, ok := fileMap[name]; !ok {
   295  			diff.removed = append(diff.removed, startingFile)
   296  		} else {
   297  			delete(fileMap, name)
   298  
   299  			if newFile.Size() != startingFile.Size() || newFile.ModTime() != startingFile.ModTime() {
   300  				diff.new = append(diff.new, newFile)
   301  			}
   302  		}
   303  	}
   304  
   305  	for _, newFile := range fileMap {
   306  		diff.new = append(diff.new, newFile)
   307  	}
   308  
   309  	return diff
   310  }