github.com/kubeshop/testkube@v1.17.23/pkg/executor/scraper/filesystem_extractor.go (about) 1 package scraper 2 3 import ( 4 "bytes" 5 "context" 6 "encoding/json" 7 "io" 8 "os" 9 "path/filepath" 10 "regexp" 11 "strings" 12 13 "github.com/kubeshop/testkube/pkg/archive" 14 "github.com/kubeshop/testkube/pkg/log" 15 16 "github.com/kubeshop/testkube/pkg/filesystem" 17 18 "github.com/pkg/errors" 19 ) 20 21 const ( 22 defaultTarballName = "artifacts.tar.gz" 23 defaultTarballMetaName = ".testkube-meta-files.json" 24 ) 25 26 type ArchiveFilesystemExtractor struct { 27 generateMeta bool 28 fs filesystem.FileSystem 29 } 30 31 func NewArchiveFilesystemExtractor(fs filesystem.FileSystem, opts ...ArchiveFilesystemExtractorOpts) *ArchiveFilesystemExtractor { 32 r := &ArchiveFilesystemExtractor{fs: fs} 33 34 for _, opt := range opts { 35 opt(r) 36 } 37 38 return r 39 } 40 41 type ArchiveFilesystemExtractorOpts func(*ArchiveFilesystemExtractor) 42 43 func GenerateTarballMetaFile() ArchiveFilesystemExtractorOpts { 44 return func(a *ArchiveFilesystemExtractor) { 45 a.generateMeta = true 46 } 47 } 48 49 func (e *ArchiveFilesystemExtractor) Extract(ctx context.Context, paths, masks []string, process ProcessFn, notify NotifyFn) error { 50 var archiveFiles []*archive.File 51 for _, dir := range paths { 52 log.DefaultLogger.Infof("scraping artifacts in directory: %v", dir) 53 54 if _, err := e.fs.Stat(dir); os.IsNotExist(err) { 55 log.DefaultLogger.Warnf("skipping directory %s because it does not exist", dir) 56 continue 57 } 58 59 err := e.fs.Walk( 60 dir, 61 func(path string, fileInfo os.FileInfo, err error) error { 62 log.DefaultLogger.Debugf("checking path %s", path) 63 if err != nil { 64 return errors.Wrap(err, "walk function returned a special error") 65 } 66 67 if fileInfo.IsDir() { 68 log.DefaultLogger.Debugf("skipping directory %s", path) 69 return nil 70 } 71 72 var regexps []*regexp.Regexp 73 for _, mask := range masks { 74 values := strings.Split(mask, ",") 75 for _, value := range values { 76 re, err := regexp.Compile(value) 77 if err != nil { 78 return errors.Wrap(err, "regexp compilation error") 79 } 80 81 regexps = append(regexps, re) 82 } 83 } 84 85 found := len(regexps) == 0 86 for i := range regexps { 87 if found = regexps[i].MatchString(path); found { 88 break 89 } 90 } 91 92 if !found { 93 return nil 94 } 95 96 if err := notify(ctx, path); err != nil { 97 log.DefaultLogger.Warnf("error notifying for file %s", path) 98 } 99 100 archiveFile, err := e.newArchiveFile(dir, path) 101 if err != nil { 102 return errors.Wrapf(err, "error creating archive file for path %s", path) 103 } 104 archiveFiles = append(archiveFiles, archiveFile) 105 106 return nil 107 }, 108 ) 109 110 if err != nil { 111 return errors.Wrapf(err, "error walking directory %s", dir) 112 } 113 } 114 115 if len(archiveFiles) == 0 { 116 log.DefaultLogger.Infof("skipping tarball creation because no files were scraped") 117 return nil 118 } 119 120 tarballService := archive.NewTarballService() 121 var artifactsTarball bytes.Buffer 122 log.DefaultLogger.Infof("creating artifacts tarball with %d files", len(archiveFiles)) 123 if err := tarballService.Create(&artifactsTarball, archiveFiles); err != nil { 124 return errors.Wrapf(err, "error creating tarball") 125 } 126 127 object := &Object{ 128 Name: defaultTarballName, 129 Size: int64(artifactsTarball.Len()), 130 Data: &artifactsTarball, 131 DataType: DataTypeTarball, 132 } 133 if err := process(ctx, object); err != nil { 134 return errors.Wrapf(err, "error processing object %s", object.Name) 135 } 136 137 if e.generateMeta { 138 tarballMeta, err := e.newTarballMeta(archiveFiles) 139 if err != nil { 140 return errors.Wrapf(err, "error creating tarball meta") 141 } 142 if err := process(ctx, tarballMeta); err != nil { 143 return errors.Wrapf(err, "error processing object %s", tarballMeta.Name) 144 } 145 } 146 147 return nil 148 } 149 150 func (e *ArchiveFilesystemExtractor) newTarballMeta(files []*archive.File) (*Object, error) { 151 var stats []*FileStat 152 for _, f := range files { 153 stats = append(stats, &FileStat{ 154 Name: f.Name, 155 Size: f.Size, 156 }) 157 } 158 meta := &FilesMeta{ 159 Files: stats, 160 DataType: DataTypeTarball, 161 Archive: defaultTarballName, 162 } 163 jsonMeta, err := json.Marshal(meta) 164 if err != nil { 165 return nil, err 166 } 167 168 return &Object{ 169 Name: defaultTarballMetaName, 170 Size: int64(len(jsonMeta)), 171 Data: bytes.NewReader(jsonMeta), 172 DataType: DataTypeRaw, 173 }, nil 174 } 175 176 func (e *ArchiveFilesystemExtractor) newArchiveFile(baseDir string, path string) (*archive.File, error) { 177 f, err := e.fs.OpenFileBuffered(path) 178 if err != nil { 179 return nil, errors.Wrapf(err, "error opening file %s", path) 180 } 181 182 stat, err := e.fs.Stat(path) 183 if err != nil { 184 return nil, errors.Wrapf(err, "error getting file stat %s", path) 185 } 186 187 relpath, err := filepath.Rel(baseDir, path) 188 if err != nil { 189 return nil, errors.Wrapf(err, "error getting relative path for %s", path) 190 } 191 if relpath == "." { 192 relpath = stat.Name() 193 } 194 195 archiveFile := archive.File{ 196 Name: relpath, 197 Size: stat.Size(), 198 Mode: int64(stat.Mode()), 199 ModTime: stat.ModTime(), 200 Data: &bytes.Buffer{}, 201 } 202 n, err := io.Copy(archiveFile.Data, f) 203 if err != nil { 204 return nil, errors.Wrapf(err, "error copying file %s data to tarball", path) 205 } 206 if n != stat.Size() { 207 return nil, errors.Errorf("error copying file %s data to tarball, expected %d bytes, got %d", path, stat.Size(), n) 208 } 209 210 return &archiveFile, nil 211 } 212 213 var _ Extractor = (*ArchiveFilesystemExtractor)(nil) 214 215 type RecursiveFilesystemExtractor struct { 216 fs filesystem.FileSystem 217 } 218 219 func NewRecursiveFilesystemExtractor(fs filesystem.FileSystem) *RecursiveFilesystemExtractor { 220 return &RecursiveFilesystemExtractor{fs: fs} 221 } 222 223 func (e *RecursiveFilesystemExtractor) Extract(ctx context.Context, paths, masks []string, process ProcessFn, notify NotifyFn) error { 224 for _, dir := range paths { 225 log.DefaultLogger.Infof("scraping artifacts in directory: %v", dir) 226 227 if _, err := e.fs.Stat(dir); os.IsNotExist(err) { 228 log.DefaultLogger.Warnf("skipping directory %s because it does not exist", dir) 229 continue 230 } 231 232 err := e.fs.Walk( 233 dir, 234 func(path string, fileInfo os.FileInfo, err error) error { 235 log.DefaultLogger.Debugf("checking path %s", path) 236 if err != nil { 237 return errors.Wrap(err, "walk function returned a special error") 238 } 239 240 if fileInfo.IsDir() { 241 log.DefaultLogger.Infof("skipping directory %s", path) 242 return nil 243 } 244 245 var regexps []*regexp.Regexp 246 for _, mask := range masks { 247 values := strings.Split(mask, ",") 248 for _, value := range values { 249 re, err := regexp.Compile(value) 250 if err != nil { 251 return errors.Wrap(err, "regexp compilation error") 252 } 253 254 regexps = append(regexps, re) 255 } 256 } 257 258 found := len(regexps) == 0 259 for i := range regexps { 260 if found = regexps[i].MatchString(path); found { 261 break 262 } 263 } 264 265 if !found { 266 return nil 267 } 268 269 if err := notify(ctx, path); err != nil { 270 log.DefaultLogger.Warnf("error notifying for file %s", path) 271 } 272 273 reader, err := e.fs.OpenFileBuffered(path) 274 if err != nil { 275 return errors.Wrapf(err, "error opening buffered %s", path) 276 } 277 relpath, err := filepath.Rel(dir, path) 278 if err != nil { 279 return errors.Wrapf(err, "error getting relative path for %s", path) 280 } 281 if relpath == "." { 282 relpath = fileInfo.Name() 283 } 284 object := &Object{ 285 Name: relpath, 286 Size: fileInfo.Size(), 287 Data: reader, 288 DataType: DataTypeRaw, 289 } 290 log.DefaultLogger.Infof("filesystem extractor is sending file to be processed: %v", object.Name) 291 if err := process(ctx, object); err != nil { 292 return errors.Wrapf(err, "failed to process file %s", object.Name) 293 } 294 295 return nil 296 }) 297 if err != nil { 298 return errors.Wrapf(err, "failed to walk directory %s", dir) 299 } 300 } 301 302 return nil 303 } 304 305 var _ Extractor = (*RecursiveFilesystemExtractor)(nil)