github.com/rclone/rclone@v1.66.1-0.20240517100346-7b89735ae726/backend/hdfs/fs.go (about)

     1  //go:build !plan9
     2  
     3  package hdfs
     4  
     5  import (
     6  	"context"
     7  	"fmt"
     8  	"io"
     9  	"os"
    10  	"os/user"
    11  	"path"
    12  	"strings"
    13  	"time"
    14  
    15  	"github.com/colinmarc/hdfs/v2"
    16  	krb "github.com/jcmturner/gokrb5/v8/client"
    17  	"github.com/jcmturner/gokrb5/v8/config"
    18  	"github.com/jcmturner/gokrb5/v8/credentials"
    19  	"github.com/rclone/rclone/fs"
    20  	"github.com/rclone/rclone/fs/config/configmap"
    21  	"github.com/rclone/rclone/fs/config/configstruct"
    22  	"github.com/rclone/rclone/fs/hash"
    23  	"github.com/rclone/rclone/lib/pacer"
    24  )
    25  
    26  // Fs represents a HDFS server
    27  type Fs struct {
    28  	name     string
    29  	root     string
    30  	features *fs.Features   // optional features
    31  	opt      Options        // options for this backend
    32  	ci       *fs.ConfigInfo // global config
    33  	client   *hdfs.Client
    34  	pacer    *fs.Pacer // pacer for API calls
    35  }
    36  
    37  const (
    38  	minSleep      = 20 * time.Millisecond
    39  	maxSleep      = 10 * time.Second
    40  	decayConstant = 2 // bigger for slower decay, exponential
    41  )
    42  
    43  // copy-paste from https://github.com/colinmarc/hdfs/blob/master/cmd/hdfs/kerberos.go
    44  func getKerberosClient() (*krb.Client, error) {
    45  	configPath := os.Getenv("KRB5_CONFIG")
    46  	if configPath == "" {
    47  		configPath = "/etc/krb5.conf"
    48  	}
    49  
    50  	cfg, err := config.Load(configPath)
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  
    55  	// Determine the ccache location from the environment, falling back to the
    56  	// default location.
    57  	ccachePath := os.Getenv("KRB5CCNAME")
    58  	if strings.Contains(ccachePath, ":") {
    59  		if strings.HasPrefix(ccachePath, "FILE:") {
    60  			ccachePath = strings.SplitN(ccachePath, ":", 2)[1]
    61  		} else {
    62  			return nil, fmt.Errorf("unusable ccache: %s", ccachePath)
    63  		}
    64  	} else if ccachePath == "" {
    65  		u, err := user.Current()
    66  		if err != nil {
    67  			return nil, err
    68  		}
    69  
    70  		ccachePath = fmt.Sprintf("/tmp/krb5cc_%s", u.Uid)
    71  	}
    72  
    73  	ccache, err := credentials.LoadCCache(ccachePath)
    74  	if err != nil {
    75  		return nil, err
    76  	}
    77  
    78  	client, err := krb.NewFromCCache(ccache, cfg)
    79  	if err != nil {
    80  		return nil, err
    81  	}
    82  
    83  	return client, nil
    84  }
    85  
    86  // NewFs constructs an Fs from the path
    87  func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) {
    88  	opt := new(Options)
    89  	err := configstruct.Set(m, opt)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  
    94  	options := hdfs.ClientOptions{
    95  		Addresses:           opt.Namenode,
    96  		UseDatanodeHostname: false,
    97  	}
    98  
    99  	if opt.ServicePrincipalName != "" {
   100  		options.KerberosClient, err = getKerberosClient()
   101  		if err != nil {
   102  			return nil, fmt.Errorf("problem with kerberos authentication: %w", err)
   103  		}
   104  		options.KerberosServicePrincipleName = opt.ServicePrincipalName
   105  
   106  		if opt.DataTransferProtection != "" {
   107  			options.DataTransferProtection = opt.DataTransferProtection
   108  		}
   109  	} else {
   110  		options.User = opt.Username
   111  	}
   112  
   113  	client, err := hdfs.NewClient(options)
   114  	if err != nil {
   115  		return nil, err
   116  	}
   117  
   118  	f := &Fs{
   119  		name:   name,
   120  		root:   root,
   121  		opt:    *opt,
   122  		ci:     fs.GetConfig(ctx),
   123  		client: client,
   124  		pacer:  fs.NewPacer(ctx, pacer.NewDefault(pacer.MinSleep(minSleep), pacer.MaxSleep(maxSleep), pacer.DecayConstant(decayConstant))),
   125  	}
   126  
   127  	f.features = (&fs.Features{
   128  		CanHaveEmptyDirectories: true,
   129  	}).Fill(ctx, f)
   130  
   131  	info, err := f.client.Stat(f.realpath(""))
   132  	if err == nil && !info.IsDir() {
   133  		f.root = path.Dir(f.root)
   134  		return f, fs.ErrorIsFile
   135  	}
   136  
   137  	return f, nil
   138  }
   139  
   140  // Name of this fs
   141  func (f *Fs) Name() string {
   142  	return f.name
   143  }
   144  
   145  // Root of the remote (as passed into NewFs)
   146  func (f *Fs) Root() string {
   147  	return f.root
   148  }
   149  
   150  // String returns a description of the FS
   151  func (f *Fs) String() string {
   152  	return fmt.Sprintf("hdfs://%s/%s", f.opt.Namenode, f.root)
   153  }
   154  
   155  // Features returns the optional features of this Fs
   156  func (f *Fs) Features() *fs.Features {
   157  	return f.features
   158  }
   159  
   160  // Precision return the precision of this Fs
   161  func (f *Fs) Precision() time.Duration {
   162  	return time.Second
   163  }
   164  
   165  // Hashes are not supported
   166  func (f *Fs) Hashes() hash.Set {
   167  	return hash.Set(hash.None)
   168  }
   169  
   170  // NewObject finds file at remote or return fs.ErrorObjectNotFound
   171  func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) {
   172  	realpath := f.realpath(remote)
   173  	fs.Debugf(f, "new [%s]", realpath)
   174  
   175  	info, err := f.ensureFile(realpath)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  
   180  	return &Object{
   181  		fs:      f,
   182  		remote:  remote,
   183  		size:    info.Size(),
   184  		modTime: info.ModTime(),
   185  	}, nil
   186  }
   187  
   188  // List the objects and directories in dir into entries.
   189  func (f *Fs) List(ctx context.Context, dir string) (entries fs.DirEntries, err error) {
   190  	realpath := f.realpath(dir)
   191  	fs.Debugf(f, "list [%s]", realpath)
   192  
   193  	err = f.ensureDirectory(realpath)
   194  	if err != nil {
   195  		return nil, err
   196  	}
   197  
   198  	list, err := f.client.ReadDir(realpath)
   199  	if err != nil {
   200  		return nil, err
   201  	}
   202  	for _, x := range list {
   203  		stdName := f.opt.Enc.ToStandardName(x.Name())
   204  		remote := path.Join(dir, stdName)
   205  		if x.IsDir() {
   206  			entries = append(entries, fs.NewDir(remote, x.ModTime()))
   207  		} else {
   208  			entries = append(entries, &Object{
   209  				fs:      f,
   210  				remote:  remote,
   211  				size:    x.Size(),
   212  				modTime: x.ModTime(),
   213  			})
   214  		}
   215  	}
   216  	return entries, nil
   217  }
   218  
   219  // Put the object
   220  func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   221  	o := &Object{
   222  		fs:     f,
   223  		remote: src.Remote(),
   224  	}
   225  	err := o.Update(ctx, in, src, options...)
   226  	return o, err
   227  }
   228  
   229  // PutStream uploads to the remote path with the modTime given of indeterminate size
   230  func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) {
   231  	return f.Put(ctx, in, src, options...)
   232  }
   233  
   234  // Mkdir makes a directory
   235  func (f *Fs) Mkdir(ctx context.Context, dir string) error {
   236  	fs.Debugf(f, "mkdir [%s]", f.realpath(dir))
   237  	return f.client.MkdirAll(f.realpath(dir), 0755)
   238  }
   239  
   240  // Rmdir deletes the directory
   241  func (f *Fs) Rmdir(ctx context.Context, dir string) error {
   242  	realpath := f.realpath(dir)
   243  	fs.Debugf(f, "rmdir [%s]", realpath)
   244  
   245  	err := f.ensureDirectory(realpath)
   246  	if err != nil {
   247  		return err
   248  	}
   249  
   250  	// do not remove empty directory
   251  	list, err := f.client.ReadDir(realpath)
   252  	if err != nil {
   253  		return err
   254  	}
   255  	if len(list) > 0 {
   256  		return fs.ErrorDirectoryNotEmpty
   257  	}
   258  
   259  	return f.client.Remove(realpath)
   260  }
   261  
   262  // Purge deletes all the files in the directory
   263  func (f *Fs) Purge(ctx context.Context, dir string) error {
   264  	realpath := f.realpath(dir)
   265  	fs.Debugf(f, "purge [%s]", realpath)
   266  
   267  	err := f.ensureDirectory(realpath)
   268  	if err != nil {
   269  		return err
   270  	}
   271  
   272  	return f.client.RemoveAll(realpath)
   273  }
   274  
   275  // Move src to this remote using server-side move operations.
   276  //
   277  // This is stored with the remote path given.
   278  //
   279  // It returns the destination Object and a possible error.
   280  //
   281  // Will only be called if src.Fs().Name() == f.Name()
   282  //
   283  // If it isn't possible then return fs.ErrorCantMove
   284  func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) {
   285  	srcObj, ok := src.(*Object)
   286  	if !ok {
   287  		fs.Debugf(src, "Can't move - not same remote type")
   288  		return nil, fs.ErrorCantMove
   289  	}
   290  
   291  	// Get the real paths from the remote specs:
   292  	sourcePath := srcObj.fs.realpath(srcObj.remote)
   293  	targetPath := f.realpath(remote)
   294  	fs.Debugf(f, "rename [%s] to [%s]", sourcePath, targetPath)
   295  
   296  	// Make sure the target folder exists:
   297  	dirname := path.Dir(targetPath)
   298  	err := f.client.MkdirAll(dirname, 0755)
   299  	if err != nil {
   300  		return nil, err
   301  	}
   302  
   303  	// Do the move
   304  	// Note that the underlying HDFS library hard-codes Overwrite=True, but this is expected rclone behaviour.
   305  	err = f.client.Rename(sourcePath, targetPath)
   306  	if err != nil {
   307  		return nil, err
   308  	}
   309  
   310  	// Look up the resulting object
   311  	info, err := f.client.Stat(targetPath)
   312  	if err != nil {
   313  		return nil, err
   314  	}
   315  
   316  	// And return it:
   317  	return &Object{
   318  		fs:      f,
   319  		remote:  remote,
   320  		size:    info.Size(),
   321  		modTime: info.ModTime(),
   322  	}, nil
   323  }
   324  
   325  // DirMove moves src, srcRemote to this remote at dstRemote
   326  // using server-side move operations.
   327  //
   328  // Will only be called if src.Fs().Name() == f.Name()
   329  //
   330  // If it isn't possible then return fs.ErrorCantDirMove
   331  //
   332  // If destination exists then return fs.ErrorDirExists
   333  func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) (err error) {
   334  	srcFs, ok := src.(*Fs)
   335  	if !ok {
   336  		return fs.ErrorCantDirMove
   337  	}
   338  
   339  	// Get the real paths from the remote specs:
   340  	sourcePath := srcFs.realpath(srcRemote)
   341  	targetPath := f.realpath(dstRemote)
   342  	fs.Debugf(f, "rename [%s] to [%s]", sourcePath, targetPath)
   343  
   344  	// Check if the destination exists:
   345  	info, err := f.client.Stat(targetPath)
   346  	if err == nil {
   347  		fs.Debugf(f, "target directory already exits, IsDir = [%t]", info.IsDir())
   348  		return fs.ErrorDirExists
   349  	}
   350  
   351  	// Make sure the targets parent folder exists:
   352  	dirname := path.Dir(targetPath)
   353  	err = f.client.MkdirAll(dirname, 0755)
   354  	if err != nil {
   355  		return err
   356  	}
   357  
   358  	// Do the move
   359  	err = f.client.Rename(sourcePath, targetPath)
   360  	if err != nil {
   361  		return err
   362  	}
   363  
   364  	return nil
   365  }
   366  
   367  // About gets quota information from the Fs
   368  func (f *Fs) About(ctx context.Context) (*fs.Usage, error) {
   369  	info, err := f.client.StatFs()
   370  	if err != nil {
   371  		return nil, err
   372  	}
   373  	return &fs.Usage{
   374  		Total: fs.NewUsageValue(int64(info.Capacity)),
   375  		Used:  fs.NewUsageValue(int64(info.Used)),
   376  		Free:  fs.NewUsageValue(int64(info.Remaining)),
   377  	}, nil
   378  }
   379  
   380  func (f *Fs) ensureDirectory(realpath string) error {
   381  	info, err := f.client.Stat(realpath)
   382  
   383  	if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist {
   384  		return fs.ErrorDirNotFound
   385  	}
   386  	if err != nil {
   387  		return err
   388  	}
   389  	if !info.IsDir() {
   390  		return fs.ErrorDirNotFound
   391  	}
   392  
   393  	return nil
   394  }
   395  
   396  func (f *Fs) ensureFile(realpath string) (os.FileInfo, error) {
   397  	info, err := f.client.Stat(realpath)
   398  
   399  	if e, ok := err.(*os.PathError); ok && e.Err == os.ErrNotExist {
   400  		return nil, fs.ErrorObjectNotFound
   401  	}
   402  	if err != nil {
   403  		return nil, err
   404  	}
   405  	if info.IsDir() {
   406  		return nil, fs.ErrorObjectNotFound
   407  	}
   408  
   409  	return info, nil
   410  }
   411  
   412  func (f *Fs) realpath(dir string) string {
   413  	return f.opt.Enc.FromStandardPath(xPath(f.Root(), dir))
   414  }
   415  
   416  // Check the interfaces are satisfied
   417  var (
   418  	_ fs.Fs          = (*Fs)(nil)
   419  	_ fs.Purger      = (*Fs)(nil)
   420  	_ fs.PutStreamer = (*Fs)(nil)
   421  	_ fs.Abouter     = (*Fs)(nil)
   422  	_ fs.Mover       = (*Fs)(nil)
   423  	_ fs.DirMover    = (*Fs)(nil)
   424  )