github.com/databricks/cli@v0.203.0/libs/filer/workspace_files_client.go (about)

     1  package filer
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"io/fs"
    10  	"net/http"
    11  	"net/url"
    12  	"path"
    13  	"regexp"
    14  	"sort"
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/databricks/databricks-sdk-go"
    19  	"github.com/databricks/databricks-sdk-go/apierr"
    20  	"github.com/databricks/databricks-sdk-go/client"
    21  	"github.com/databricks/databricks-sdk-go/service/workspace"
    22  	"golang.org/x/exp/slices"
    23  )
    24  
    25  // Type that implements fs.DirEntry for WSFS.
    26  type wsfsDirEntry struct {
    27  	wsfsFileInfo
    28  }
    29  
    30  func (entry wsfsDirEntry) Type() fs.FileMode {
    31  	return entry.wsfsFileInfo.Mode()
    32  }
    33  
    34  func (entry wsfsDirEntry) Info() (fs.FileInfo, error) {
    35  	return entry.wsfsFileInfo, nil
    36  }
    37  
    38  // Type that implements fs.FileInfo for WSFS.
    39  type wsfsFileInfo struct {
    40  	oi workspace.ObjectInfo
    41  }
    42  
    43  func (info wsfsFileInfo) Name() string {
    44  	return path.Base(info.oi.Path)
    45  }
    46  
    47  func (info wsfsFileInfo) Size() int64 {
    48  	return info.oi.Size
    49  }
    50  
    51  func (info wsfsFileInfo) Mode() fs.FileMode {
    52  	switch info.oi.ObjectType {
    53  	case workspace.ObjectTypeDirectory:
    54  		return fs.ModeDir
    55  	default:
    56  		return fs.ModePerm
    57  	}
    58  }
    59  
    60  func (info wsfsFileInfo) ModTime() time.Time {
    61  	return time.UnixMilli(info.oi.ModifiedAt)
    62  }
    63  
    64  func (info wsfsFileInfo) IsDir() bool {
    65  	return info.oi.ObjectType == workspace.ObjectTypeDirectory
    66  }
    67  
    68  func (info wsfsFileInfo) Sys() any {
    69  	return info.oi
    70  }
    71  
    72  // WorkspaceFilesClient implements the files-in-workspace API.
    73  
    74  // NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled.
    75  // It can access any workspace path if files-in-workspace is enabled.
    76  type WorkspaceFilesClient struct {
    77  	workspaceClient *databricks.WorkspaceClient
    78  	apiClient       *client.DatabricksClient
    79  
    80  	// File operations will be relative to this path.
    81  	root WorkspaceRootPath
    82  }
    83  
    84  func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) {
    85  	apiClient, err := client.New(w.Config)
    86  	if err != nil {
    87  		return nil, err
    88  	}
    89  
    90  	return &WorkspaceFilesClient{
    91  		workspaceClient: w,
    92  		apiClient:       apiClient,
    93  
    94  		root: NewWorkspaceRootPath(root),
    95  	}, nil
    96  }
    97  
    98  func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error {
    99  	absPath, err := w.root.Join(name)
   100  	if err != nil {
   101  		return err
   102  	}
   103  
   104  	// Remove leading "/" so we can use it in the URL.
   105  	overwrite := slices.Contains(mode, OverwriteIfExists)
   106  	urlPath := fmt.Sprintf(
   107  		"/api/2.0/workspace-files/import-file/%s?overwrite=%t",
   108  		url.PathEscape(strings.TrimLeft(absPath, "/")),
   109  		overwrite,
   110  	)
   111  
   112  	// Buffer the file contents because we may need to retry below and we cannot read twice.
   113  	body, err := io.ReadAll(reader)
   114  	if err != nil {
   115  		return err
   116  	}
   117  
   118  	err = w.apiClient.Do(ctx, http.MethodPost, urlPath, body, nil)
   119  
   120  	// Return early on success.
   121  	if err == nil {
   122  		return nil
   123  	}
   124  
   125  	// Special handling of this error only if it is an API error.
   126  	var aerr *apierr.APIError
   127  	if !errors.As(err, &aerr) {
   128  		return err
   129  	}
   130  
   131  	// This API returns a 404 if the parent directory does not exist.
   132  	if aerr.StatusCode == http.StatusNotFound {
   133  		if !slices.Contains(mode, CreateParentDirectories) {
   134  			return NoSuchDirectoryError{path.Dir(absPath)}
   135  		}
   136  
   137  		// Create parent directory.
   138  		err = w.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(absPath))
   139  		if err != nil {
   140  			return fmt.Errorf("unable to mkdir to write file %s: %w", absPath, err)
   141  		}
   142  
   143  		// Retry without CreateParentDirectories mode flag.
   144  		return w.Write(ctx, name, bytes.NewReader(body), sliceWithout(mode, CreateParentDirectories)...)
   145  	}
   146  
   147  	// This API returns 409 if the file already exists, when the object type is file
   148  	if aerr.StatusCode == http.StatusConflict {
   149  		return FileAlreadyExistsError{absPath}
   150  	}
   151  
   152  	// This API returns 400 if the file already exists, when the object type is notebook
   153  	regex := regexp.MustCompile(`Path \((.*)\) already exists.`)
   154  	if aerr.StatusCode == http.StatusBadRequest && regex.Match([]byte(aerr.Message)) {
   155  		// Parse file path from regex capture group
   156  		matches := regex.FindStringSubmatch(aerr.Message)
   157  		if len(matches) == 2 {
   158  			return FileAlreadyExistsError{matches[1]}
   159  		}
   160  
   161  		// Default to path specified to filer.Write if regex capture fails
   162  		return FileAlreadyExistsError{absPath}
   163  	}
   164  
   165  	return err
   166  }
   167  
   168  func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) {
   169  	absPath, err := w.root.Join(name)
   170  	if err != nil {
   171  		return nil, err
   172  	}
   173  
   174  	// This stat call serves two purposes:
   175  	// 1. Checks file at path exists, and throws an error if it does not
   176  	// 2. Allows us to error out if the path is a directory. This is needed
   177  	// because the /workspace/export API does not error out, and returns the directory
   178  	// as a DBC archive even if format "SOURCE" is specified
   179  	stat, err := w.Stat(ctx, name)
   180  	if err != nil {
   181  		return nil, err
   182  	}
   183  	if stat.IsDir() {
   184  		return nil, NotAFile{absPath}
   185  	}
   186  
   187  	// Export file contents. Note the /workspace/export API has a limit of 10MBs
   188  	// for the file size
   189  	return w.workspaceClient.Workspace.Download(ctx, absPath)
   190  }
   191  
   192  func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error {
   193  	absPath, err := w.root.Join(name)
   194  	if err != nil {
   195  		return err
   196  	}
   197  
   198  	// Illegal to delete the root path.
   199  	if absPath == w.root.rootPath {
   200  		return CannotDeleteRootError{}
   201  	}
   202  
   203  	recursive := false
   204  	if slices.Contains(mode, DeleteRecursively) {
   205  		recursive = true
   206  	}
   207  
   208  	err = w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{
   209  		Path:      absPath,
   210  		Recursive: recursive,
   211  	})
   212  
   213  	// Return early on success.
   214  	if err == nil {
   215  		return nil
   216  	}
   217  
   218  	// Special handling of this error only if it is an API error.
   219  	var aerr *apierr.APIError
   220  	if !errors.As(err, &aerr) {
   221  		return err
   222  	}
   223  
   224  	switch aerr.StatusCode {
   225  	case http.StatusBadRequest:
   226  		if aerr.ErrorCode == "DIRECTORY_NOT_EMPTY" {
   227  			return DirectoryNotEmptyError{absPath}
   228  		}
   229  	case http.StatusNotFound:
   230  		return FileDoesNotExistError{absPath}
   231  	}
   232  
   233  	return err
   234  }
   235  
   236  func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) {
   237  	absPath, err := w.root.Join(name)
   238  	if err != nil {
   239  		return nil, err
   240  	}
   241  
   242  	objects, err := w.workspaceClient.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{
   243  		Path: absPath,
   244  	})
   245  
   246  	if len(objects) == 1 && objects[0].Path == absPath {
   247  		return nil, NotADirectory{absPath}
   248  	}
   249  
   250  	if err != nil {
   251  		// If we got an API error we deal with it below.
   252  		var aerr *apierr.APIError
   253  		if !errors.As(err, &aerr) {
   254  			return nil, err
   255  		}
   256  
   257  		// This API returns a 404 if the specified path does not exist.
   258  		if aerr.StatusCode == http.StatusNotFound {
   259  			return nil, NoSuchDirectoryError{path.Dir(absPath)}
   260  		}
   261  
   262  		return nil, err
   263  	}
   264  
   265  	info := make([]fs.DirEntry, len(objects))
   266  	for i, v := range objects {
   267  		info[i] = wsfsDirEntry{wsfsFileInfo{oi: v}}
   268  	}
   269  
   270  	// Sort by name for parity with os.ReadDir.
   271  	sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() })
   272  	return info, nil
   273  }
   274  
   275  func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error {
   276  	dirPath, err := w.root.Join(name)
   277  	if err != nil {
   278  		return err
   279  	}
   280  	return w.workspaceClient.Workspace.Mkdirs(ctx, workspace.Mkdirs{
   281  		Path: dirPath,
   282  	})
   283  }
   284  
   285  func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) {
   286  	absPath, err := w.root.Join(name)
   287  	if err != nil {
   288  		return nil, err
   289  	}
   290  
   291  	info, err := w.workspaceClient.Workspace.GetStatusByPath(ctx, absPath)
   292  	if err != nil {
   293  		// If we got an API error we deal with it below.
   294  		var aerr *apierr.APIError
   295  		if !errors.As(err, &aerr) {
   296  			return nil, err
   297  		}
   298  
   299  		// This API returns a 404 if the specified path does not exist.
   300  		if aerr.StatusCode == http.StatusNotFound {
   301  			return nil, FileDoesNotExistError{absPath}
   302  		}
   303  	}
   304  
   305  	return wsfsFileInfo{*info}, nil
   306  }