github.com/Jeffail/benthos/v3@v3.65.0/lib/input/sftp.go (about) 1 package input 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "sync" 9 "time" 10 11 "github.com/Jeffail/benthos/v3/internal/codec" 12 "github.com/Jeffail/benthos/v3/internal/docs" 13 sftpSetup "github.com/Jeffail/benthos/v3/internal/impl/sftp" 14 "github.com/Jeffail/benthos/v3/internal/interop" 15 "github.com/Jeffail/benthos/v3/lib/input/reader" 16 "github.com/Jeffail/benthos/v3/lib/log" 17 "github.com/Jeffail/benthos/v3/lib/message" 18 "github.com/Jeffail/benthos/v3/lib/metrics" 19 "github.com/Jeffail/benthos/v3/lib/types" 20 "github.com/pkg/sftp" 21 ) 22 23 func init() { 24 watcherDocs := docs.FieldSpecs{ 25 docs.FieldCommon( 26 "enabled", 27 "Whether file watching is enabled.", 28 ), 29 docs.FieldCommon( 30 "minimum_age", 31 "The minimum period of time since a file was last updated before attempting to consume it. Increasing this period decreases the likelihood that a file will be consumed whilst it is still being written to.", 32 "10s", "1m", "10m", 33 ), 34 docs.FieldCommon( 35 "poll_interval", 36 "The interval between each attempt to scan the target paths for new files.", 37 "100ms", "1s", 38 ), 39 docs.FieldCommon( 40 "cache", 41 "A [cache resource](/docs/components/caches/about) for storing the paths of files already consumed.", 42 ), 43 } 44 45 Constructors[TypeSFTP] = TypeSpec{ 46 constructor: fromSimpleConstructor(func(conf Config, mgr types.Manager, log log.Modular, stats metrics.Type) (Type, error) { 47 r, err := newSFTPReader(conf.SFTP, mgr, log, stats) 48 if err != nil { 49 return nil, err 50 } 51 return NewAsyncReader( 52 TypeSFTP, 53 true, 54 reader.NewAsyncPreserver(r), 55 log, stats, 56 ) 57 }), 58 Status: docs.StatusExperimental, 59 Version: "3.39.0", 60 Summary: `Consumes files from a server over SFTP.`, 61 Description: ` 62 ## Metadata 63 64 This input adds the following metadata fields to each message: 65 66 ` + "```" + ` 67 - sftp_path 68 ` + "```" + ` 69 70 You can access these metadata fields using [function interpolation](/docs/configuration/interpolation#metadata).`, 71 FieldSpecs: docs.FieldSpecs{ 72 docs.FieldCommon( 73 "address", 74 "The address of the server to connect to that has the target files.", 75 ), 76 docs.FieldCommon( 77 "credentials", 78 "The credentials to use to log into the server.", 79 ).WithChildren(sftpSetup.CredentialsDocs()...), 80 docs.FieldString( 81 "paths", 82 "A list of paths to consume sequentially. Glob patterns are supported.", 83 ).Array(), 84 codec.ReaderDocs, 85 docs.FieldAdvanced("delete_on_finish", "Whether to delete files from the server once they are processed."), 86 docs.FieldAdvanced("max_buffer", "The largest token size expected when consuming delimited files."), 87 docs.FieldCommon( 88 "watcher", 89 "An experimental mode whereby the input will periodically scan the target paths for new files and consume them, when all files are consumed the input will continue polling for new files.", 90 ).WithChildren(watcherDocs...).AtVersion("3.42.0"), 91 }, 92 Categories: []Category{ 93 CategoryNetwork, 94 }, 95 } 96 } 97 98 //------------------------------------------------------------------------------ 99 100 type watcherConfig struct { 101 Enabled bool `json:"enabled" yaml:"enabled"` 102 MinimumAge string `json:"minimum_age" yaml:"minimum_age"` 103 PollInterval string `json:"poll_interval" yaml:"poll_interval"` 104 Cache string `json:"cache" yaml:"cache"` 105 } 106 107 // SFTPConfig contains configuration fields for the SFTP input type. 108 type SFTPConfig struct { 109 Address string `json:"address" yaml:"address"` 110 Credentials sftpSetup.Credentials `json:"credentials" yaml:"credentials"` 111 Paths []string `json:"paths" yaml:"paths"` 112 Codec string `json:"codec" yaml:"codec"` 113 DeleteOnFinish bool `json:"delete_on_finish" yaml:"delete_on_finish"` 114 MaxBuffer int `json:"max_buffer" yaml:"max_buffer"` 115 Watcher watcherConfig `json:"watcher" yaml:"watcher"` 116 } 117 118 // NewSFTPConfig creates a new SFTPConfig with default values. 119 func NewSFTPConfig() SFTPConfig { 120 return SFTPConfig{ 121 Address: "", 122 Credentials: sftpSetup.Credentials{}, 123 Paths: []string{}, 124 Codec: "all-bytes", 125 DeleteOnFinish: false, 126 MaxBuffer: 1000000, 127 Watcher: watcherConfig{ 128 Enabled: false, 129 MinimumAge: "1s", 130 PollInterval: "1s", 131 Cache: "", 132 }, 133 } 134 } 135 136 //------------------------------------------------------------------------------ 137 138 type sftpReader struct { 139 conf SFTPConfig 140 141 log log.Modular 142 stats metrics.Type 143 mgr types.Manager 144 145 client *sftp.Client 146 147 paths []string 148 scannerCtor codec.ReaderConstructor 149 150 scannerMut sync.Mutex 151 scanner codec.Reader 152 currentPath string 153 154 watcherPollInterval time.Duration 155 watcherMinAge time.Duration 156 } 157 158 func newSFTPReader(conf SFTPConfig, mgr types.Manager, log log.Modular, stats metrics.Type) (*sftpReader, error) { 159 codecConf := codec.NewReaderConfig() 160 codecConf.MaxScanTokenSize = conf.MaxBuffer 161 ctor, err := codec.GetReader(conf.Codec, codecConf) 162 if err != nil { 163 return nil, err 164 } 165 166 var watcherPollInterval, watcherMinAge time.Duration 167 if conf.Watcher.Enabled { 168 if watcherPollInterval, err = time.ParseDuration(conf.Watcher.PollInterval); err != nil { 169 return nil, fmt.Errorf("failed to parse watcher poll interval: %w", err) 170 } 171 172 if watcherMinAge, err = time.ParseDuration(conf.Watcher.MinimumAge); err != nil { 173 return nil, fmt.Errorf("failed to parse watcher minimum age: %w", err) 174 } 175 176 if conf.Watcher.Cache == "" { 177 return nil, errors.New("a cache must be specified when watcher mode is enabled") 178 } 179 180 if err := interop.ProbeCache(context.Background(), mgr, conf.Watcher.Cache); err != nil { 181 return nil, err 182 } 183 } 184 185 s := &sftpReader{ 186 conf: conf, 187 log: log, 188 stats: stats, 189 mgr: mgr, 190 scannerCtor: ctor, 191 watcherPollInterval: watcherPollInterval, 192 watcherMinAge: watcherMinAge, 193 } 194 195 return s, err 196 } 197 198 // ConnectWithContext attempts to establish a connection to the target SFTP server. 199 func (s *sftpReader) ConnectWithContext(ctx context.Context) error { 200 var err error 201 202 s.scannerMut.Lock() 203 defer s.scannerMut.Unlock() 204 205 if s.scanner != nil { 206 return nil 207 } 208 209 if s.client == nil { 210 if s.client, err = s.conf.Credentials.GetClient(s.conf.Address); err != nil { 211 return err 212 } 213 s.log.Debugln("Finding more paths") 214 s.paths, err = s.getFilePaths() 215 if err != nil { 216 return err 217 } 218 } 219 220 if len(s.paths) == 0 { 221 if !s.conf.Watcher.Enabled { 222 s.client.Close() 223 s.client = nil 224 s.log.Debugln("Paths exhausted, closing input") 225 return types.ErrTypeClosed 226 } 227 select { 228 case <-time.After(s.watcherPollInterval): 229 case <-ctx.Done(): 230 return ctx.Err() 231 } 232 s.paths, err = s.getFilePaths() 233 return err 234 } 235 236 nextPath := s.paths[0] 237 238 file, err := s.client.Open(nextPath) 239 if err != nil { 240 return err 241 } 242 243 if s.scanner, err = s.scannerCtor(nextPath, file, func(ctx context.Context, err error) error { 244 if err == nil && s.conf.DeleteOnFinish { 245 return s.client.Remove(nextPath) 246 } 247 return nil 248 }); err != nil { 249 file.Close() 250 return err 251 } 252 253 s.currentPath = nextPath 254 s.paths = s.paths[1:] 255 256 s.log.Infof("Consuming from file '%v'\n", nextPath) 257 return err 258 } 259 260 // ReadWithContext attempts to read a new message from the target file(s) on the server. 261 func (s *sftpReader) ReadWithContext(ctx context.Context) (types.Message, reader.AsyncAckFn, error) { 262 s.scannerMut.Lock() 263 defer s.scannerMut.Unlock() 264 265 if s.scanner == nil || s.client == nil { 266 return nil, nil, types.ErrNotConnected 267 } 268 269 parts, codecAckFn, err := s.scanner.Next(ctx) 270 if err != nil { 271 if errors.Is(err, context.Canceled) || 272 errors.Is(err, context.DeadlineExceeded) { 273 err = types.ErrTimeout 274 } 275 if err != types.ErrTimeout { 276 if s.conf.Watcher.Enabled { 277 var setErr error 278 if cerr := interop.AccessCache(ctx, s.mgr, s.conf.Watcher.Cache, func(cache types.Cache) { 279 setErr = cache.Set(s.currentPath, []byte("@")) 280 }); cerr != nil { 281 return nil, nil, fmt.Errorf("failed to get the cache for sftp watcher mode: %v", cerr) 282 } 283 if setErr != nil { 284 return nil, nil, fmt.Errorf("failed to update path in cache %s: %v", s.currentPath, err) 285 } 286 } 287 s.scanner.Close(ctx) 288 s.scanner = nil 289 } 290 if errors.Is(err, io.EOF) { 291 err = types.ErrTimeout 292 } 293 return nil, nil, err 294 } 295 296 for _, part := range parts { 297 part.Metadata().Set("sftp_path", s.currentPath) 298 } 299 msg := message.New(nil) 300 msg.Append(parts...) 301 302 return msg, func(ctx context.Context, res types.Response) error { 303 return codecAckFn(ctx, res.Error()) 304 }, nil 305 } 306 307 // CloseAsync begins cleaning up resources used by this reader asynchronously. 308 func (s *sftpReader) CloseAsync() { 309 go func() { 310 s.scannerMut.Lock() 311 if s.scanner != nil { 312 s.scanner.Close(context.Background()) 313 s.scanner = nil 314 s.paths = nil 315 } 316 if s.client != nil { 317 s.client.Close() 318 s.client = nil 319 } 320 s.scannerMut.Unlock() 321 }() 322 } 323 324 // WaitForClose will block until either the reader is closed or a specified 325 // timeout occurs. 326 func (s *sftpReader) WaitForClose(timeout time.Duration) error { 327 return nil 328 } 329 330 func (s *sftpReader) getFilePaths() ([]string, error) { 331 var filepaths []string 332 if !s.conf.Watcher.Enabled { 333 for _, p := range s.conf.Paths { 334 paths, err := s.client.Glob(p) 335 if err != nil { 336 s.log.Warnf("Failed to scan files from path %v: %v\n", p, err) 337 continue 338 } 339 filepaths = append(filepaths, paths...) 340 } 341 return filepaths, nil 342 } 343 344 if cerr := interop.AccessCache(context.Background(), s.mgr, s.conf.Watcher.Cache, func(cache types.Cache) { 345 for _, p := range s.conf.Paths { 346 paths, err := s.client.Glob(p) 347 if err != nil { 348 s.log.Warnf("Failed to scan files from path %v: %v\n", p, err) 349 continue 350 } 351 352 for _, path := range paths { 353 info, err := s.client.Stat(path) 354 if err != nil { 355 s.log.Warnf("Failed to stat path %v: %v\n", path, err) 356 continue 357 } 358 if time.Since(info.ModTime()) < s.watcherMinAge { 359 continue 360 } 361 if _, err := cache.Get(path); err != nil { 362 filepaths = append(filepaths, path) 363 } else if err = cache.Set(path, []byte("@")); err != nil { // Reset the TTL for the path 364 s.log.Warnf("Failed to set key in cache for path %v: %v\n", path, err) 365 } 366 } 367 } 368 }); cerr != nil { 369 return nil, fmt.Errorf("error getting cache in getFilePaths: %v", cerr) 370 } 371 return filepaths, nil 372 }