github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/local/walker.go (about)

     1  package local
     2  
     3  import (
     4  	"context"
     5  	"crypto/md5" //nolint:gosec
     6  	"encoding/hex"
     7  	"encoding/json"
     8  	"io"
     9  	"io/fs"
    10  	"net/url"
    11  	"os"
    12  	"path"
    13  	"path/filepath"
    14  	"sort"
    15  	"strings"
    16  
    17  	gonanoid "github.com/matoous/go-nanoid/v2"
    18  	"github.com/treeverse/lakefs/pkg/block"
    19  	"github.com/treeverse/lakefs/pkg/block/params"
    20  )
    21  
    22  const cacheDirName = "_lakefs_cache"
    23  
    24  type Walker struct {
    25  	mark            block.Mark
    26  	importHidden    bool
    27  	allowedPrefixes []string
    28  	cacheLocation   string
    29  	path            string
    30  }
    31  
    32  func NewLocalWalker(params params.Local) *Walker {
    33  	// without Path, we do not keep cache - will make walker very slow
    34  	var cacheLocation string
    35  	if params.Path != "" {
    36  		cacheLocation = filepath.Join(params.Path, cacheDirName)
    37  	}
    38  	return &Walker{
    39  		mark:            block.Mark{HasMore: true},
    40  		importHidden:    params.ImportHidden,
    41  		allowedPrefixes: params.AllowedExternalPrefixes,
    42  		cacheLocation:   cacheLocation,
    43  		path:            params.Path,
    44  	}
    45  }
    46  
    47  func (l *Walker) Walk(_ context.Context, storageURI *url.URL, options block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error {
    48  	if storageURI.Scheme != "local" {
    49  		return path.ErrBadPattern
    50  	}
    51  	root := path.Join(storageURI.Host, storageURI.Path)
    52  	if err := VerifyAbsPath(root, l.path, l.allowedPrefixes); err != nil {
    53  		return err
    54  	}
    55  
    56  	var entries []*block.ObjectStoreEntry
    57  	// verify and use cache - location is stored in continuation token
    58  	if options.ContinuationToken != "" && strings.HasPrefix(options.ContinuationToken, l.cacheLocation) {
    59  		cacheData, err := os.ReadFile(options.ContinuationToken)
    60  		if err == nil {
    61  			err = json.Unmarshal(cacheData, &entries)
    62  			if err != nil {
    63  				entries = nil
    64  			} else {
    65  				l.mark.ContinuationToken = options.ContinuationToken
    66  			}
    67  		}
    68  	}
    69  
    70  	// if needed scan all entries to import and calc etag
    71  	if entries == nil {
    72  		var err error
    73  		entries, err = l.scanEntries(root, options)
    74  		if err != nil {
    75  			return err
    76  		}
    77  
    78  		// store entries to cache file
    79  		if l.cacheLocation != "" {
    80  			jsonData, err := json.Marshal(entries)
    81  			if err != nil {
    82  				return err
    83  			}
    84  			const dirPerm = 0o755
    85  			_ = os.MkdirAll(l.cacheLocation, dirPerm)
    86  			cacheName := filepath.Join(l.cacheLocation, gonanoid.Must()+"-import.json")
    87  			const cachePerm = 0o644
    88  			if err := os.WriteFile(cacheName, jsonData, cachePerm); err != nil {
    89  				_ = os.Remove(cacheName)
    90  				return err
    91  			}
    92  			l.mark.ContinuationToken = cacheName
    93  		}
    94  	}
    95  
    96  	// search start position base on Last key
    97  	startIndex := sort.Search(len(entries), func(i int) bool {
    98  		return entries[i].FullKey > options.After
    99  	})
   100  	for i := startIndex; i < len(entries); i++ {
   101  		ent := *entries[i]
   102  		etag, err := calcFileETag(ent)
   103  		if err != nil {
   104  			return err
   105  		}
   106  
   107  		ent.ETag = etag
   108  		l.mark.LastKey = ent.FullKey
   109  		if err := walkFn(ent); err != nil {
   110  			return err
   111  		}
   112  	}
   113  	// delete cache in case we completed the iteration
   114  	if l.mark.ContinuationToken != "" {
   115  		if err := os.Remove(l.mark.ContinuationToken); err != nil {
   116  			return err
   117  		}
   118  	}
   119  	l.mark = block.Mark{}
   120  	return nil
   121  }
   122  
   123  func (l *Walker) scanEntries(root string, options block.WalkOptions) ([]*block.ObjectStoreEntry, error) {
   124  	var entries []*block.ObjectStoreEntry
   125  	if err := filepath.Walk(root, func(p string, info fs.FileInfo, err error) error {
   126  		if err != nil {
   127  			return err
   128  		}
   129  
   130  		// skip hidden files and directories
   131  		if !l.importHidden && strings.HasPrefix(info.Name(), ".") {
   132  			if info.IsDir() {
   133  				return fs.SkipDir
   134  			}
   135  			return nil
   136  		}
   137  
   138  		key := filepath.ToSlash(p)
   139  		if key < options.After {
   140  			return nil
   141  		}
   142  		if !info.Mode().IsRegular() {
   143  			return nil
   144  		}
   145  
   146  		addr := "local://" + key
   147  		relativePath, err := filepath.Rel(root, p)
   148  		if err != nil {
   149  			return err
   150  		}
   151  		// etag is calculated during iteration
   152  		ent := &block.ObjectStoreEntry{
   153  			FullKey:     key,
   154  			RelativeKey: filepath.ToSlash(relativePath),
   155  			Address:     addr,
   156  			Mtime:       info.ModTime(),
   157  			Size:        info.Size(),
   158  		}
   159  		entries = append(entries, ent)
   160  		return nil
   161  	}); err != nil {
   162  		return nil, err
   163  	}
   164  	sort.Slice(entries, func(i, j int) bool {
   165  		return entries[i].FullKey < entries[j].FullKey
   166  	})
   167  	return entries, nil
   168  }
   169  
   170  func calcFileETag(ent block.ObjectStoreEntry) (string, error) {
   171  	f, err := os.Open(ent.FullKey)
   172  	if err != nil {
   173  		return "", err
   174  	}
   175  	defer func() { _ = f.Close() }()
   176  	hash := md5.New() //nolint:gosec
   177  	_, err = io.Copy(hash, f)
   178  	if err != nil {
   179  		return "", err
   180  	}
   181  	etag := hex.EncodeToString(hash.Sum(nil))
   182  	return etag, nil
   183  }
   184  
   185  func (l *Walker) Marker() block.Mark {
   186  	return l.mark
   187  }
   188  
   189  func (l *Walker) GetSkippedEntries() []block.ObjectStoreEntry {
   190  	return nil
   191  }