github.com/databricks/cli@v0.203.0/libs/filer/dbfs_client.go (about)

     1  package filer
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"io"
     7  	"io/fs"
     8  	"net/http"
     9  	"path"
    10  	"sort"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/databricks/databricks-sdk-go"
    15  	"github.com/databricks/databricks-sdk-go/apierr"
    16  	"github.com/databricks/databricks-sdk-go/service/files"
    17  	"golang.org/x/exp/slices"
    18  )
    19  
    20  // Type that implements fs.DirEntry for DBFS.
    21  type dbfsDirEntry struct {
    22  	dbfsFileInfo
    23  }
    24  
    25  func (entry dbfsDirEntry) Type() fs.FileMode {
    26  	return entry.Mode()
    27  }
    28  
    29  func (entry dbfsDirEntry) Info() (fs.FileInfo, error) {
    30  	return entry.dbfsFileInfo, nil
    31  }
    32  
    33  // Type that implements fs.FileInfo for DBFS.
    34  type dbfsFileInfo struct {
    35  	fi files.FileInfo
    36  }
    37  
    38  func (info dbfsFileInfo) Name() string {
    39  	return path.Base(info.fi.Path)
    40  }
    41  
    42  func (info dbfsFileInfo) Size() int64 {
    43  	return info.fi.FileSize
    44  }
    45  
    46  func (info dbfsFileInfo) Mode() fs.FileMode {
    47  	mode := fs.ModePerm
    48  	if info.fi.IsDir {
    49  		mode |= fs.ModeDir
    50  	}
    51  	return mode
    52  }
    53  
    54  func (info dbfsFileInfo) ModTime() time.Time {
    55  	return time.UnixMilli(info.fi.ModificationTime)
    56  }
    57  
    58  func (info dbfsFileInfo) IsDir() bool {
    59  	return info.fi.IsDir
    60  }
    61  
    62  func (info dbfsFileInfo) Sys() any {
    63  	return info.fi
    64  }
    65  
    66  // DbfsClient implements the [Filer] interface for the DBFS backend.
    67  type DbfsClient struct {
    68  	workspaceClient *databricks.WorkspaceClient
    69  
    70  	// File operations will be relative to this path.
    71  	root WorkspaceRootPath
    72  }
    73  
    74  func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) {
    75  	return &DbfsClient{
    76  		workspaceClient: w,
    77  
    78  		root: NewWorkspaceRootPath(root),
    79  	}, nil
    80  }
    81  
    82  func (w *DbfsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error {
    83  	absPath, err := w.root.Join(name)
    84  	if err != nil {
    85  		return err
    86  	}
    87  
    88  	fileMode := files.FileModeWrite
    89  	if slices.Contains(mode, OverwriteIfExists) {
    90  		fileMode |= files.FileModeOverwrite
    91  	}
    92  
    93  	// Issue info call before write because it automatically creates parent directories.
    94  	//
    95  	// For discussion: we could decide this is actually convenient, remove the call below,
    96  	// and apply the same semantics for the WSFS filer.
    97  	//
    98  	if !slices.Contains(mode, CreateParentDirectories) {
    99  		_, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, path.Dir(absPath))
   100  		if err != nil {
   101  			var aerr *apierr.APIError
   102  			if !errors.As(err, &aerr) {
   103  				return err
   104  			}
   105  
   106  			// This API returns a 404 if the file doesn't exist.
   107  			if aerr.StatusCode == http.StatusNotFound {
   108  				if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
   109  					return NoSuchDirectoryError{path.Dir(absPath)}
   110  				}
   111  			}
   112  
   113  			return err
   114  		}
   115  	}
   116  
   117  	handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, fileMode)
   118  	if err != nil {
   119  		var aerr *apierr.APIError
   120  		if !errors.As(err, &aerr) {
   121  			return err
   122  		}
   123  
   124  		// This API returns a 400 if the file already exists.
   125  		if aerr.StatusCode == http.StatusBadRequest {
   126  			if aerr.ErrorCode == "RESOURCE_ALREADY_EXISTS" {
   127  				return FileAlreadyExistsError{absPath}
   128  			}
   129  		}
   130  
   131  		return err
   132  	}
   133  
   134  	_, err = io.Copy(handle, reader)
   135  	cerr := handle.Close()
   136  	if err == nil {
   137  		err = cerr
   138  	}
   139  
   140  	return err
   141  }
   142  
   143  func (w *DbfsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) {
   144  	absPath, err := w.root.Join(name)
   145  	if err != nil {
   146  		return nil, err
   147  	}
   148  
   149  	handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead)
   150  	if err != nil {
   151  		// Return error if file is a directory
   152  		if strings.Contains(err.Error(), "cannot open directory for reading") {
   153  			return nil, NotAFile{absPath}
   154  		}
   155  
   156  		var aerr *apierr.APIError
   157  		if !errors.As(err, &aerr) {
   158  			return nil, err
   159  		}
   160  
   161  		// This API returns a 404 if the file doesn't exist.
   162  		if aerr.StatusCode == http.StatusNotFound {
   163  			if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
   164  				return nil, FileDoesNotExistError{absPath}
   165  			}
   166  		}
   167  
   168  		return nil, err
   169  	}
   170  
   171  	// A DBFS handle open for reading does not need to be closed.
   172  	return io.NopCloser(handle), nil
   173  }
   174  
   175  func (w *DbfsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error {
   176  	absPath, err := w.root.Join(name)
   177  	if err != nil {
   178  		return err
   179  	}
   180  
   181  	// Illegal to delete the root path.
   182  	if absPath == w.root.rootPath {
   183  		return CannotDeleteRootError{}
   184  	}
   185  
   186  	// Issue info call before delete because delete succeeds if the specified path doesn't exist.
   187  	//
   188  	// For discussion: we could decide this is actually convenient, remove the call below,
   189  	// and apply the same semantics for the WSFS filer.
   190  	//
   191  	_, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath)
   192  	if err != nil {
   193  		var aerr *apierr.APIError
   194  		if !errors.As(err, &aerr) {
   195  			return err
   196  		}
   197  
   198  		// This API returns a 404 if the file doesn't exist.
   199  		if aerr.StatusCode == http.StatusNotFound {
   200  			if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
   201  				return FileDoesNotExistError{absPath}
   202  			}
   203  		}
   204  
   205  		return err
   206  	}
   207  
   208  	recursive := false
   209  	if slices.Contains(mode, DeleteRecursively) {
   210  		recursive = true
   211  	}
   212  
   213  	err = w.workspaceClient.Dbfs.Delete(ctx, files.Delete{
   214  		Path:      absPath,
   215  		Recursive: recursive,
   216  	})
   217  
   218  	// Return early on success.
   219  	if err == nil {
   220  		return nil
   221  	}
   222  
   223  	// Special handling of this error only if it is an API error.
   224  	var aerr *apierr.APIError
   225  	if !errors.As(err, &aerr) {
   226  		return err
   227  	}
   228  
   229  	switch aerr.StatusCode {
   230  	case http.StatusBadRequest:
   231  		// Anecdotally, this error is returned when attempting to delete a non-empty directory.
   232  		if aerr.ErrorCode == "IO_ERROR" {
   233  			return DirectoryNotEmptyError{absPath}
   234  		}
   235  	}
   236  
   237  	return err
   238  }
   239  
   240  func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) {
   241  	absPath, err := w.root.Join(name)
   242  	if err != nil {
   243  		return nil, err
   244  	}
   245  
   246  	res, err := w.workspaceClient.Dbfs.ListByPath(ctx, absPath)
   247  	if err != nil {
   248  		var aerr *apierr.APIError
   249  		if !errors.As(err, &aerr) {
   250  			return nil, err
   251  		}
   252  
   253  		// This API returns a 404 if the file doesn't exist.
   254  		if aerr.StatusCode == http.StatusNotFound {
   255  			if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
   256  				return nil, NoSuchDirectoryError{absPath}
   257  			}
   258  		}
   259  
   260  		return nil, err
   261  	}
   262  
   263  	if len(res.Files) == 1 && res.Files[0].Path == absPath {
   264  		return nil, NotADirectory{absPath}
   265  	}
   266  
   267  	info := make([]fs.DirEntry, len(res.Files))
   268  	for i, v := range res.Files {
   269  		info[i] = dbfsDirEntry{dbfsFileInfo: dbfsFileInfo{fi: v}}
   270  	}
   271  
   272  	// Sort by name for parity with os.ReadDir.
   273  	sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() })
   274  	return info, nil
   275  }
   276  
   277  func (w *DbfsClient) Mkdir(ctx context.Context, name string) error {
   278  	dirPath, err := w.root.Join(name)
   279  	if err != nil {
   280  		return err
   281  	}
   282  
   283  	return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath)
   284  }
   285  
   286  func (w *DbfsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
   287  	absPath, err := w.root.Join(name)
   288  	if err != nil {
   289  		return nil, err
   290  	}
   291  
   292  	info, err := w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath)
   293  	if err != nil {
   294  		var aerr *apierr.APIError
   295  		if !errors.As(err, &aerr) {
   296  			return nil, err
   297  		}
   298  
   299  		// This API returns a 404 if the file doesn't exist.
   300  		if aerr.StatusCode == http.StatusNotFound {
   301  			if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
   302  				return nil, FileDoesNotExistError{absPath}
   303  			}
   304  		}
   305  
   306  		return nil, err
   307  	}
   308  
   309  	return dbfsFileInfo{*info}, nil
   310  }