github.com/kubeshop/testkube@v1.17.23/pkg/executor/scraper/filesystem_extractor.go (about)

     1  package scraper
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"encoding/json"
     7  	"io"
     8  	"os"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strings"
    12  
    13  	"github.com/kubeshop/testkube/pkg/archive"
    14  	"github.com/kubeshop/testkube/pkg/log"
    15  
    16  	"github.com/kubeshop/testkube/pkg/filesystem"
    17  
    18  	"github.com/pkg/errors"
    19  )
    20  
    21  const (
    22  	defaultTarballName     = "artifacts.tar.gz"
    23  	defaultTarballMetaName = ".testkube-meta-files.json"
    24  )
    25  
    26  type ArchiveFilesystemExtractor struct {
    27  	generateMeta bool
    28  	fs           filesystem.FileSystem
    29  }
    30  
    31  func NewArchiveFilesystemExtractor(fs filesystem.FileSystem, opts ...ArchiveFilesystemExtractorOpts) *ArchiveFilesystemExtractor {
    32  	r := &ArchiveFilesystemExtractor{fs: fs}
    33  
    34  	for _, opt := range opts {
    35  		opt(r)
    36  	}
    37  
    38  	return r
    39  }
    40  
    41  type ArchiveFilesystemExtractorOpts func(*ArchiveFilesystemExtractor)
    42  
    43  func GenerateTarballMetaFile() ArchiveFilesystemExtractorOpts {
    44  	return func(a *ArchiveFilesystemExtractor) {
    45  		a.generateMeta = true
    46  	}
    47  }
    48  
    49  func (e *ArchiveFilesystemExtractor) Extract(ctx context.Context, paths, masks []string, process ProcessFn, notify NotifyFn) error {
    50  	var archiveFiles []*archive.File
    51  	for _, dir := range paths {
    52  		log.DefaultLogger.Infof("scraping artifacts in directory: %v", dir)
    53  
    54  		if _, err := e.fs.Stat(dir); os.IsNotExist(err) {
    55  			log.DefaultLogger.Warnf("skipping directory %s because it does not exist", dir)
    56  			continue
    57  		}
    58  
    59  		err := e.fs.Walk(
    60  			dir,
    61  			func(path string, fileInfo os.FileInfo, err error) error {
    62  				log.DefaultLogger.Debugf("checking path %s", path)
    63  				if err != nil {
    64  					return errors.Wrap(err, "walk function returned a special error")
    65  				}
    66  
    67  				if fileInfo.IsDir() {
    68  					log.DefaultLogger.Debugf("skipping directory %s", path)
    69  					return nil
    70  				}
    71  
    72  				var regexps []*regexp.Regexp
    73  				for _, mask := range masks {
    74  					values := strings.Split(mask, ",")
    75  					for _, value := range values {
    76  						re, err := regexp.Compile(value)
    77  						if err != nil {
    78  							return errors.Wrap(err, "regexp compilation error")
    79  						}
    80  
    81  						regexps = append(regexps, re)
    82  					}
    83  				}
    84  
    85  				found := len(regexps) == 0
    86  				for i := range regexps {
    87  					if found = regexps[i].MatchString(path); found {
    88  						break
    89  					}
    90  				}
    91  
    92  				if !found {
    93  					return nil
    94  				}
    95  
    96  				if err := notify(ctx, path); err != nil {
    97  					log.DefaultLogger.Warnf("error notifying for file %s", path)
    98  				}
    99  
   100  				archiveFile, err := e.newArchiveFile(dir, path)
   101  				if err != nil {
   102  					return errors.Wrapf(err, "error creating archive file for path %s", path)
   103  				}
   104  				archiveFiles = append(archiveFiles, archiveFile)
   105  
   106  				return nil
   107  			},
   108  		)
   109  
   110  		if err != nil {
   111  			return errors.Wrapf(err, "error walking directory %s", dir)
   112  		}
   113  	}
   114  
   115  	if len(archiveFiles) == 0 {
   116  		log.DefaultLogger.Infof("skipping tarball creation because no files were scraped")
   117  		return nil
   118  	}
   119  
   120  	tarballService := archive.NewTarballService()
   121  	var artifactsTarball bytes.Buffer
   122  	log.DefaultLogger.Infof("creating artifacts tarball with %d files", len(archiveFiles))
   123  	if err := tarballService.Create(&artifactsTarball, archiveFiles); err != nil {
   124  		return errors.Wrapf(err, "error creating tarball")
   125  	}
   126  
   127  	object := &Object{
   128  		Name:     defaultTarballName,
   129  		Size:     int64(artifactsTarball.Len()),
   130  		Data:     &artifactsTarball,
   131  		DataType: DataTypeTarball,
   132  	}
   133  	if err := process(ctx, object); err != nil {
   134  		return errors.Wrapf(err, "error processing object %s", object.Name)
   135  	}
   136  
   137  	if e.generateMeta {
   138  		tarballMeta, err := e.newTarballMeta(archiveFiles)
   139  		if err != nil {
   140  			return errors.Wrapf(err, "error creating tarball meta")
   141  		}
   142  		if err := process(ctx, tarballMeta); err != nil {
   143  			return errors.Wrapf(err, "error processing object %s", tarballMeta.Name)
   144  		}
   145  	}
   146  
   147  	return nil
   148  }
   149  
   150  func (e *ArchiveFilesystemExtractor) newTarballMeta(files []*archive.File) (*Object, error) {
   151  	var stats []*FileStat
   152  	for _, f := range files {
   153  		stats = append(stats, &FileStat{
   154  			Name: f.Name,
   155  			Size: f.Size,
   156  		})
   157  	}
   158  	meta := &FilesMeta{
   159  		Files:    stats,
   160  		DataType: DataTypeTarball,
   161  		Archive:  defaultTarballName,
   162  	}
   163  	jsonMeta, err := json.Marshal(meta)
   164  	if err != nil {
   165  		return nil, err
   166  	}
   167  
   168  	return &Object{
   169  		Name:     defaultTarballMetaName,
   170  		Size:     int64(len(jsonMeta)),
   171  		Data:     bytes.NewReader(jsonMeta),
   172  		DataType: DataTypeRaw,
   173  	}, nil
   174  }
   175  
   176  func (e *ArchiveFilesystemExtractor) newArchiveFile(baseDir string, path string) (*archive.File, error) {
   177  	f, err := e.fs.OpenFileBuffered(path)
   178  	if err != nil {
   179  		return nil, errors.Wrapf(err, "error opening file %s", path)
   180  	}
   181  
   182  	stat, err := e.fs.Stat(path)
   183  	if err != nil {
   184  		return nil, errors.Wrapf(err, "error getting file stat %s", path)
   185  	}
   186  
   187  	relpath, err := filepath.Rel(baseDir, path)
   188  	if err != nil {
   189  		return nil, errors.Wrapf(err, "error getting relative path for %s", path)
   190  	}
   191  	if relpath == "." {
   192  		relpath = stat.Name()
   193  	}
   194  
   195  	archiveFile := archive.File{
   196  		Name:    relpath,
   197  		Size:    stat.Size(),
   198  		Mode:    int64(stat.Mode()),
   199  		ModTime: stat.ModTime(),
   200  		Data:    &bytes.Buffer{},
   201  	}
   202  	n, err := io.Copy(archiveFile.Data, f)
   203  	if err != nil {
   204  		return nil, errors.Wrapf(err, "error copying file %s data to tarball", path)
   205  	}
   206  	if n != stat.Size() {
   207  		return nil, errors.Errorf("error copying file %s data to tarball, expected %d bytes, got %d", path, stat.Size(), n)
   208  	}
   209  
   210  	return &archiveFile, nil
   211  }
   212  
   213  var _ Extractor = (*ArchiveFilesystemExtractor)(nil)
   214  
   215  type RecursiveFilesystemExtractor struct {
   216  	fs filesystem.FileSystem
   217  }
   218  
   219  func NewRecursiveFilesystemExtractor(fs filesystem.FileSystem) *RecursiveFilesystemExtractor {
   220  	return &RecursiveFilesystemExtractor{fs: fs}
   221  }
   222  
   223  func (e *RecursiveFilesystemExtractor) Extract(ctx context.Context, paths, masks []string, process ProcessFn, notify NotifyFn) error {
   224  	for _, dir := range paths {
   225  		log.DefaultLogger.Infof("scraping artifacts in directory: %v", dir)
   226  
   227  		if _, err := e.fs.Stat(dir); os.IsNotExist(err) {
   228  			log.DefaultLogger.Warnf("skipping directory %s because it does not exist", dir)
   229  			continue
   230  		}
   231  
   232  		err := e.fs.Walk(
   233  			dir,
   234  			func(path string, fileInfo os.FileInfo, err error) error {
   235  				log.DefaultLogger.Debugf("checking path %s", path)
   236  				if err != nil {
   237  					return errors.Wrap(err, "walk function returned a special error")
   238  				}
   239  
   240  				if fileInfo.IsDir() {
   241  					log.DefaultLogger.Infof("skipping directory %s", path)
   242  					return nil
   243  				}
   244  
   245  				var regexps []*regexp.Regexp
   246  				for _, mask := range masks {
   247  					values := strings.Split(mask, ",")
   248  					for _, value := range values {
   249  						re, err := regexp.Compile(value)
   250  						if err != nil {
   251  							return errors.Wrap(err, "regexp compilation error")
   252  						}
   253  
   254  						regexps = append(regexps, re)
   255  					}
   256  				}
   257  
   258  				found := len(regexps) == 0
   259  				for i := range regexps {
   260  					if found = regexps[i].MatchString(path); found {
   261  						break
   262  					}
   263  				}
   264  
   265  				if !found {
   266  					return nil
   267  				}
   268  
   269  				if err := notify(ctx, path); err != nil {
   270  					log.DefaultLogger.Warnf("error notifying for file %s", path)
   271  				}
   272  
   273  				reader, err := e.fs.OpenFileBuffered(path)
   274  				if err != nil {
   275  					return errors.Wrapf(err, "error opening buffered %s", path)
   276  				}
   277  				relpath, err := filepath.Rel(dir, path)
   278  				if err != nil {
   279  					return errors.Wrapf(err, "error getting relative path for %s", path)
   280  				}
   281  				if relpath == "." {
   282  					relpath = fileInfo.Name()
   283  				}
   284  				object := &Object{
   285  					Name:     relpath,
   286  					Size:     fileInfo.Size(),
   287  					Data:     reader,
   288  					DataType: DataTypeRaw,
   289  				}
   290  				log.DefaultLogger.Infof("filesystem extractor is sending file to be processed: %v", object.Name)
   291  				if err := process(ctx, object); err != nil {
   292  					return errors.Wrapf(err, "failed to process file %s", object.Name)
   293  				}
   294  
   295  				return nil
   296  			})
   297  		if err != nil {
   298  			return errors.Wrapf(err, "failed to walk directory %s", dir)
   299  		}
   300  	}
   301  
   302  	return nil
   303  }
   304  
   305  var _ Extractor = (*RecursiveFilesystemExtractor)(nil)