github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/hdfs/fs.go (about) 1 //go:build !plan9 2 3 package hdfs 4 5 import ( 6 "context" 7 "fmt" 8 "io" 9 "os" 10 "os/user" 11 "path" 12 "strings" 13 "time" 14 15 "github.com/colinmarc/hdfs/v2" 16 krb "github.com/jcmturner/gokrb5/v8/client" 17 "github.com/jcmturner/gokrb5/v8/config" 18 "github.com/jcmturner/gokrb5/v8/credentials" 19 "github.com/rclone/rclone/fs" 20 "github.com/rclone/rclone/fs/config/configmap" 21 "github.com/rclone/rclone/fs/config/configstruct" 22 "github.com/rclone/rclone/fs/hash" 23 "github.com/rclone/rclone/lib/pacer" 24 ) 25 26 // Fs represents a HDFS server 27 type Fs struct { 28 name string 29 root string 30 features *fs.Features // optional features 31 opt Options // options for this backend 32 ci *fs.ConfigInfo // global config 33 client *hdfs.Client 34 pacer *fs.Pacer // pacer for API calls 35 } 36 37 const ( 38 minSleep = 20 * time.Millisecond 39 maxSleep = 10 * time.Second 40 decayConstant = 2 // bigger for slower decay, exponential 41 ) 42 43 // copy-paste from https://github.com/colinmarc/hdfs/blob/master/cmd/hdfs/kerberos.go 44 func getKerberosClient() (*krb.Client, error) { 45 configPath := os.Getenv("KRB5_CONFIG") 46 if configPath == "" { 47 configPath = "/etc/krb5.conf" 48 } 49 50 cfg, err := config.Load(configPath) 51 if err != nil { 52 return nil, err 53 } 54 55 // Determine the ccache location from the environment, falling back to the 56 // default location. 57 ccachePath := os.Getenv("KRB5CCNAME") 58 if strings.Contains(ccachePath, ":") { 59 if strings.HasPrefix(ccachePath, "FILE:") { 60 ccachePath = strings.SplitN(ccachePath, ":", 2)[1] 61 } else { 62 return nil, fmt.Errorf("unusable ccache: %s", ccachePath) 63 } 64 } else if ccachePath == "" { 65 u, err := user.Current() 66 if err != nil { 67 return nil, err 68 } 69 70 ccachePath = fmt.Sprintf("/tmp/krb5cc_%s", u.Uid) 71 } 72 73 ccache, err := credentials.LoadCCache(ccachePath) 74 if err != nil { 75 return nil, err 76 } 77 78 client, err := krb.NewFromCCache(ccache, cfg) 79 if err != nil { 80 return nil, err 81 } 82 83 return client, nil 84 } 85 86 // NewFs constructs an Fs from the path 87 func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { 88 opt := new(Options) 89 err := configstruct.Set(m, opt) 90 if err != nil { 91 return nil, err 92 } 93 94 options := hdfs.ClientOptions{ 95 Addresses: opt.Namenode, 96 UseDatanodeHostname: false, 97 } 98 99 if opt.ServicePrincipalName != "" { 100 options.KerberosClient, err = getKerberosClient() 101 if err != nil { 102 return nil, fmt.Errorf("problem with kerberos authentication: %w", err) 103 } 104 options.KerberosServicePrincipleName = opt.ServicePrincipalName 105 106 if opt.DataTransferProtection != "" { 107 options.DataTransferProtection = opt.DataTransferProtection 108 } 109 } else { 110 options.User = opt.Username 111 } 112 113 client, err := hdfs.NewClient(options) 114 if err != nil { 115 return nil, err 116 } 117 118 f := &Fs{ 119 name: name, 120 root: root, 121 opt: *opt, 122 ci: fs.GetConfig(ctx), 123 client: client, 124 pacer: fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))), 125 } 126 127 f.features = (&fs.Features{ 128 CanHaveEmptyDirectories: true, 129 }).Fill(ctx, f) 130 131 info, err := f.client.Stat(f.realpath("")) 132 if err == nil && !info.IsDir() { 133 f.root = path.Dir(f.root) 134 return f, fs.ErrorIsFile 135 } 136 137 return f, nil 138 } 139 140 // Name of this fs 141 func (f *Fs) Name() string { 142 return f.name 143 } 144 145 // Root of the remote (as passed into NewFs) 146 func (f *Fs) Root() string { 147 return f.root 148 } 149 150 // String returns a description of the FS 151 func (f *Fs) String() string { 152 return fmt.Sprintf("hdfs://%s/%s", f.opt.Namenode, f.root) 153 } 154 155 // Features returns the optional features of this Fs 156 func (f *Fs) Features() *fs.Features { 157 return f.features 158 } 159 160 // Precision return the precision of this Fs 161 func (f *Fs) Precision() time.Duration { 162 return time.Second 163 } 164 165 // Hashes are not supported 166 func (f *Fs) Hashes() hash.Set { 167 return hash.Set(hash.None) 168 } 169 170 // NewObject finds file at remote or return fs.ErrorObjectNotFound 171 func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { 172 realpath := f.realpath(remote) 173 fs.Debugf(f, "new [%s]", realpath) 174 175 info, err := f.ensureFile(realpath) 176 if err != nil { 177 return nil, err 178 } 179 180 return &Object{ 181 fs: f, 182 remote: remote, 183 size: info.Size(), 184 modTime: info.ModTime(), 185 }, nil 186 } 187 188 // List the objects and directories in dir into entries. 189 func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) { 190 realpath := f.realpath(dir) 191 fs.Debugf(f, "list [%s]", realpath) 192 193 err = f.ensureDirectory(realpath) 194 if err != nil { 195 return nil, err 196 } 197 198 list, err := f.client.ReadDir(realpath) 199 if err != nil { 200 return nil, err 201 } 202 for _, x := range list { 203 stdName := f.opt.Enc.ToStandardName(x.Name()) 204 remote := path.Join(dir, stdName) 205 if x.IsDir() { 206 entries = append(entries, fs.NewDir(remote, x.ModTime())) 207 } else { 208 entries = append(entries, &Object{ 209 fs: f, 210 remote: remote, 211 size: x.Size(), 212 modTime: x.ModTime(), 213 }) 214 } 215 } 216 return entries, nil 217 } 218 219 // Put the object 220 func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 221 o := &Object{ 222 fs: f, 223 remote: src.Remote(), 224 } 225 err := o.Update(ctx, in, src, options...) 226 return o, err 227 } 228 229 // PutStream uploads to the remote path with the modTime given of indeterminate size 230 func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { 231 return f.Put(ctx, in, src, options...) 232 } 233 234 // Mkdir makes a directory 235 func (f *Fs) Mkdir(ctx context.Context, dir string) error { 236 fs.Debugf(f, "mkdir [%s]", f.realpath(dir)) 237 return f.client.MkdirAll(f.realpath(dir), 0755) 238 } 239 240 // Rmdir deletes the directory 241 func (f *Fs) Rmdir(ctx context.Context, dir string) error { 242 realpath := f.realpath(dir) 243 fs.Debugf(f, "rmdir [%s]", realpath) 244 245 err := f.ensureDirectory(realpath) 246 if err != nil { 247 return err 248 } 249 250 // do not remove empty directory 251 list, err := f.client.ReadDir(realpath) 252 if err != nil { 253 return err 254 } 255 if len(list) > 0 { 256 return fs.ErrorDirectoryNotEmpty 257 } 258 259 return f.client.Remove(realpath) 260 } 261 262 // Purge deletes all the files in the directory 263 func (f *Fs) Purge(ctx context.Context, dir string) error { 264 realpath := f.realpath(dir) 265 fs.Debugf(f, "purge [%s]", realpath) 266 267 err := f.ensureDirectory(realpath) 268 if err != nil { 269 return err 270 } 271 272 return f.client.RemoveAll(realpath) 273 } 274 275 // Move src to this remote using server-side move operations. 276 // 277 // This is stored with the remote path given. 278 // 279 // It returns the destination Object and a possible error. 280 // 281 // Will only be called if src.Fs().Name() == f.Name() 282 // 283 // If it isn't possible then return fs.ErrorCantMove 284 func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { 285 srcObj, ok := src.(*Object) 286 if !ok { 287 fs.Debugf(src, "Can't move - not same remote type") 288 return nil, fs.ErrorCantMove 289 } 290 291 // Get the real paths from the remote specs: 292 sourcePath := srcObj.fs.realpath(srcObj.remote) 293 targetPath := f.realpath(remote) 294 fs.Debugf(f, "rename [%s] to [%s]", sourcePath, targetPath) 295 296 // Make sure the target folder exists: 297 dirname := path.Dir(targetPath) 298 err := f.client.MkdirAll(dirname, 0755) 299 if err != nil { 300 return nil, err 301 } 302 303 // Do the move 304 // Note that the underlying HDFS library hard-codes Overwrite=True, but this is expected rclone behaviour. 305 err = f.client.Rename(sourcePath, targetPath) 306 if err != nil { 307 return nil, err 308 } 309 310 // Look up the resulting object 311 info, err := f.client.Stat(targetPath) 312 if err != nil { 313 return nil, err 314 } 315 316 // And return it: 317 return &Object{ 318 fs: f, 319 remote: remote, 320 size: info.Size(), 321 modTime: info.ModTime(), 322 }, nil 323 } 324 325 // DirMove moves src, srcRemote to this remote at dstRemote 326 // using server-side move operations. 327 // 328 // Will only be called if src.Fs().Name() == f.Name() 329 // 330 // If it isn't possible then return fs.ErrorCantDirMove 331 // 332 // If destination exists then return fs.ErrorDirExists 333 func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) (err error) { 334 srcFs, ok := src.(*Fs) 335 if !ok { 336 return fs.ErrorCantDirMove 337 } 338 339 // Get the real paths from the remote specs: 340 sourcePath := srcFs.realpath(srcRemote) 341 targetPath := f.realpath(dstRemote) 342 fs.Debugf(f, "rename [%s] to [%s]", sourcePath, targetPath) 343 344 // Check if the destination exists: 345 info, err := f.client.Stat(targetPath) 346 if err == nil { 347 fs.Debugf(f, "target directory already exits, IsDir = [%t]", info.IsDir()) 348 return fs.ErrorDirExists 349 } 350 351 // Make sure the targets parent folder exists: 352 dirname := path.Dir(targetPath) 353 err = f.client.MkdirAll(dirname, 0755) 354 if err != nil { 355 return err 356 } 357 358 // Do the move 359 err = f.client.Rename(sourcePath, targetPath) 360 if err != nil { 361 return err 362 } 363 364 return nil 365 } 366 367 // About gets quota information from the Fs 368 func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { 369 info, err := f.client.StatFs() 370 if err != nil { 371 return nil, err 372 } 373 return &fs.Usage{ 374 Total: fs.NewUsageValue(int64(info.Capacity)), 375 Used: fs.NewUsageValue(int64(info.Used)), 376 Free: fs.NewUsageValue(int64(info.Remaining)), 377 }, nil 378 } 379 380 func (f *Fs) ensureDirectory(realpath string) error { 381 info, err := f.client.Stat(realpath) 382 383 if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist { 384 return fs.ErrorDirNotFound 385 } 386 if err != nil { 387 return err 388 } 389 if !info.IsDir() { 390 return fs.ErrorDirNotFound 391 } 392 393 return nil 394 } 395 396 func (f *Fs) ensureFile(realpath string) (os.FileInfo, error) { 397 info, err := f.client.Stat(realpath) 398 399 if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist { 400 return nil, fs.ErrorObjectNotFound 401 } 402 if err != nil { 403 return nil, err 404 } 405 if info.IsDir() { 406 return nil, fs.ErrorObjectNotFound 407 } 408 409 return info, nil 410 } 411 412 func (f *Fs) realpath(dir string) string { 413 return f.opt.Enc.FromStandardPath(xPath(f.Root(), dir)) 414 } 415 416 // Check the interfaces are satisfied 417 var ( 418 _ fs.Fs = (*Fs)(nil) 419 _ fs.Purger = (*Fs)(nil) 420 _ fs.PutStreamer = (*Fs)(nil) 421 _ fs.Abouter = (*Fs)(nil) 422 _ fs.Mover = (*Fs)(nil) 423 _ fs.DirMover = (*Fs)(nil) 424 )