github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/block/local/walker.go (about) 1 package local 2 3 import ( 4 "context" 5 "crypto/md5" //nolint:gosec 6 "encoding/hex" 7 "encoding/json" 8 "io" 9 "io/fs" 10 "net/url" 11 "os" 12 "path" 13 "path/filepath" 14 "sort" 15 "strings" 16 17 gonanoid "github.com/matoous/go-nanoid/v2" 18 "github.com/treeverse/lakefs/pkg/block" 19 "github.com/treeverse/lakefs/pkg/block/params" 20 ) 21 22 const cacheDirName = "_lakefs_cache" 23 24 type Walker struct { 25 mark block.Mark 26 importHidden bool 27 allowedPrefixes []string 28 cacheLocation string 29 path string 30 } 31 32 func NewLocalWalker(params params.Local) *Walker { 33 // without Path, we do not keep cache - will make walker very slow 34 var cacheLocation string 35 if params.Path != "" { 36 cacheLocation = filepath.Join(params.Path, cacheDirName) 37 } 38 return &Walker{ 39 mark: block.Mark{HasMore: true}, 40 importHidden: params.ImportHidden, 41 allowedPrefixes: params.AllowedExternalPrefixes, 42 cacheLocation: cacheLocation, 43 path: params.Path, 44 } 45 } 46 47 func (l *Walker) Walk(_ context.Context, storageURI *url.URL, options block.WalkOptions, walkFn func(e block.ObjectStoreEntry) error) error { 48 if storageURI.Scheme != "local" { 49 return path.ErrBadPattern 50 } 51 root := path.Join(storageURI.Host, storageURI.Path) 52 if err := VerifyAbsPath(root, l.path, l.allowedPrefixes); err != nil { 53 return err 54 } 55 56 var entries []*block.ObjectStoreEntry 57 // verify and use cache - location is stored in continuation token 58 if options.ContinuationToken != "" && strings.HasPrefix(options.ContinuationToken, l.cacheLocation) { 59 cacheData, err := os.ReadFile(options.ContinuationToken) 60 if err == nil { 61 err = json.Unmarshal(cacheData, &entries) 62 if err != nil { 63 entries = nil 64 } else { 65 l.mark.ContinuationToken = options.ContinuationToken 66 } 67 } 68 } 69 70 // if needed scan all entries to import and calc etag 71 if entries == nil { 72 var err error 73 entries, err = l.scanEntries(root, options) 74 if err != nil { 75 return err 76 } 77 78 // store entries to cache file 79 if l.cacheLocation != "" { 80 jsonData, err := json.Marshal(entries) 81 if err != nil { 82 return err 83 } 84 const dirPerm = 0o755 85 _ = os.MkdirAll(l.cacheLocation, dirPerm) 86 cacheName := filepath.Join(l.cacheLocation, gonanoid.Must()+"-import.json") 87 const cachePerm = 0o644 88 if err := os.WriteFile(cacheName, jsonData, cachePerm); err != nil { 89 _ = os.Remove(cacheName) 90 return err 91 } 92 l.mark.ContinuationToken = cacheName 93 } 94 } 95 96 // search start position base on Last key 97 startIndex := sort.Search(len(entries), func(i int) bool { 98 return entries[i].FullKey > options.After 99 }) 100 for i := startIndex; i < len(entries); i++ { 101 ent := *entries[i] 102 etag, err := calcFileETag(ent) 103 if err != nil { 104 return err 105 } 106 107 ent.ETag = etag 108 l.mark.LastKey = ent.FullKey 109 if err := walkFn(ent); err != nil { 110 return err 111 } 112 } 113 // delete cache in case we completed the iteration 114 if l.mark.ContinuationToken != "" { 115 if err := os.Remove(l.mark.ContinuationToken); err != nil { 116 return err 117 } 118 } 119 l.mark = block.Mark{} 120 return nil 121 } 122 123 func (l *Walker) scanEntries(root string, options block.WalkOptions) ([]*block.ObjectStoreEntry, error) { 124 var entries []*block.ObjectStoreEntry 125 if err := filepath.Walk(root, func(p string, info fs.FileInfo, err error) error { 126 if err != nil { 127 return err 128 } 129 130 // skip hidden files and directories 131 if !l.importHidden && strings.HasPrefix(info.Name(), ".") { 132 if info.IsDir() { 133 return fs.SkipDir 134 } 135 return nil 136 } 137 138 key := filepath.ToSlash(p) 139 if key < options.After { 140 return nil 141 } 142 if !info.Mode().IsRegular() { 143 return nil 144 } 145 146 addr := "local://" + key 147 relativePath, err := filepath.Rel(root, p) 148 if err != nil { 149 return err 150 } 151 // etag is calculated during iteration 152 ent := &block.ObjectStoreEntry{ 153 FullKey: key, 154 RelativeKey: filepath.ToSlash(relativePath), 155 Address: addr, 156 Mtime: info.ModTime(), 157 Size: info.Size(), 158 } 159 entries = append(entries, ent) 160 return nil 161 }); err != nil { 162 return nil, err 163 } 164 sort.Slice(entries, func(i, j int) bool { 165 return entries[i].FullKey < entries[j].FullKey 166 }) 167 return entries, nil 168 } 169 170 func calcFileETag(ent block.ObjectStoreEntry) (string, error) { 171 f, err := os.Open(ent.FullKey) 172 if err != nil { 173 return "", err 174 } 175 defer func() { _ = f.Close() }() 176 hash := md5.New() //nolint:gosec 177 _, err = io.Copy(hash, f) 178 if err != nil { 179 return "", err 180 } 181 etag := hex.EncodeToString(hash.Sum(nil)) 182 return etag, nil 183 } 184 185 func (l *Walker) Marker() block.Mark { 186 return l.mark 187 } 188 189 func (l *Walker) GetSkippedEntries() []block.ObjectStoreEntry { 190 return nil 191 }