github.com/databricks/cli@v0.203.0/libs/filer/workspace_files_client.go (about) 1 package filer 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "fmt" 8 "io" 9 "io/fs" 10 "net/http" 11 "net/url" 12 "path" 13 "regexp" 14 "sort" 15 "strings" 16 "time" 17 18 "github.com/databricks/databricks-sdk-go" 19 "github.com/databricks/databricks-sdk-go/apierr" 20 "github.com/databricks/databricks-sdk-go/client" 21 "github.com/databricks/databricks-sdk-go/service/workspace" 22 "golang.org/x/exp/slices" 23 ) 24 25 // Type that implements fs.DirEntry for WSFS. 26 type wsfsDirEntry struct { 27 wsfsFileInfo 28 } 29 30 func (entry wsfsDirEntry) Type() fs.FileMode { 31 return entry.wsfsFileInfo.Mode() 32 } 33 34 func (entry wsfsDirEntry) Info() (fs.FileInfo, error) { 35 return entry.wsfsFileInfo, nil 36 } 37 38 // Type that implements fs.FileInfo for WSFS. 39 type wsfsFileInfo struct { 40 oi workspace.ObjectInfo 41 } 42 43 func (info wsfsFileInfo) Name() string { 44 return path.Base(info.oi.Path) 45 } 46 47 func (info wsfsFileInfo) Size() int64 { 48 return info.oi.Size 49 } 50 51 func (info wsfsFileInfo) Mode() fs.FileMode { 52 switch info.oi.ObjectType { 53 case workspace.ObjectTypeDirectory: 54 return fs.ModeDir 55 default: 56 return fs.ModePerm 57 } 58 } 59 60 func (info wsfsFileInfo) ModTime() time.Time { 61 return time.UnixMilli(info.oi.ModifiedAt) 62 } 63 64 func (info wsfsFileInfo) IsDir() bool { 65 return info.oi.ObjectType == workspace.ObjectTypeDirectory 66 } 67 68 func (info wsfsFileInfo) Sys() any { 69 return info.oi 70 } 71 72 // WorkspaceFilesClient implements the files-in-workspace API. 73 74 // NOTE: This API is available for files under /Repos if a workspace has files-in-repos enabled. 75 // It can access any workspace path if files-in-workspace is enabled. 76 type WorkspaceFilesClient struct { 77 workspaceClient *databricks.WorkspaceClient 78 apiClient *client.DatabricksClient 79 80 // File operations will be relative to this path. 81 root WorkspaceRootPath 82 } 83 84 func NewWorkspaceFilesClient(w *databricks.WorkspaceClient, root string) (Filer, error) { 85 apiClient, err := client.New(w.Config) 86 if err != nil { 87 return nil, err 88 } 89 90 return &WorkspaceFilesClient{ 91 workspaceClient: w, 92 apiClient: apiClient, 93 94 root: NewWorkspaceRootPath(root), 95 }, nil 96 } 97 98 func (w *WorkspaceFilesClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { 99 absPath, err := w.root.Join(name) 100 if err != nil { 101 return err 102 } 103 104 // Remove leading "/" so we can use it in the URL. 105 overwrite := slices.Contains(mode, OverwriteIfExists) 106 urlPath := fmt.Sprintf( 107 "/api/2.0/workspace-files/import-file/%s?overwrite=%t", 108 url.PathEscape(strings.TrimLeft(absPath, "/")), 109 overwrite, 110 ) 111 112 // Buffer the file contents because we may need to retry below and we cannot read twice. 113 body, err := io.ReadAll(reader) 114 if err != nil { 115 return err 116 } 117 118 err = w.apiClient.Do(ctx, http.MethodPost, urlPath, body, nil) 119 120 // Return early on success. 121 if err == nil { 122 return nil 123 } 124 125 // Special handling of this error only if it is an API error. 126 var aerr *apierr.APIError 127 if !errors.As(err, &aerr) { 128 return err 129 } 130 131 // This API returns a 404 if the parent directory does not exist. 132 if aerr.StatusCode == http.StatusNotFound { 133 if !slices.Contains(mode, CreateParentDirectories) { 134 return NoSuchDirectoryError{path.Dir(absPath)} 135 } 136 137 // Create parent directory. 138 err = w.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(absPath)) 139 if err != nil { 140 return fmt.Errorf("unable to mkdir to write file %s: %w", absPath, err) 141 } 142 143 // Retry without CreateParentDirectories mode flag. 144 return w.Write(ctx, name, bytes.NewReader(body), sliceWithout(mode, CreateParentDirectories)...) 145 } 146 147 // This API returns 409 if the file already exists, when the object type is file 148 if aerr.StatusCode == http.StatusConflict { 149 return FileAlreadyExistsError{absPath} 150 } 151 152 // This API returns 400 if the file already exists, when the object type is notebook 153 regex := regexp.MustCompile(`Path \((.*)\) already exists.`) 154 if aerr.StatusCode == http.StatusBadRequest && regex.Match([]byte(aerr.Message)) { 155 // Parse file path from regex capture group 156 matches := regex.FindStringSubmatch(aerr.Message) 157 if len(matches) == 2 { 158 return FileAlreadyExistsError{matches[1]} 159 } 160 161 // Default to path specified to filer.Write if regex capture fails 162 return FileAlreadyExistsError{absPath} 163 } 164 165 return err 166 } 167 168 func (w *WorkspaceFilesClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { 169 absPath, err := w.root.Join(name) 170 if err != nil { 171 return nil, err 172 } 173 174 // This stat call serves two purposes: 175 // 1. Checks file at path exists, and throws an error if it does not 176 // 2. Allows us to error out if the path is a directory. This is needed 177 // because the /workspace/export API does not error out, and returns the directory 178 // as a DBC archive even if format "SOURCE" is specified 179 stat, err := w.Stat(ctx, name) 180 if err != nil { 181 return nil, err 182 } 183 if stat.IsDir() { 184 return nil, NotAFile{absPath} 185 } 186 187 // Export file contents. Note the /workspace/export API has a limit of 10MBs 188 // for the file size 189 return w.workspaceClient.Workspace.Download(ctx, absPath) 190 } 191 192 func (w *WorkspaceFilesClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { 193 absPath, err := w.root.Join(name) 194 if err != nil { 195 return err 196 } 197 198 // Illegal to delete the root path. 199 if absPath == w.root.rootPath { 200 return CannotDeleteRootError{} 201 } 202 203 recursive := false 204 if slices.Contains(mode, DeleteRecursively) { 205 recursive = true 206 } 207 208 err = w.workspaceClient.Workspace.Delete(ctx, workspace.Delete{ 209 Path: absPath, 210 Recursive: recursive, 211 }) 212 213 // Return early on success. 214 if err == nil { 215 return nil 216 } 217 218 // Special handling of this error only if it is an API error. 219 var aerr *apierr.APIError 220 if !errors.As(err, &aerr) { 221 return err 222 } 223 224 switch aerr.StatusCode { 225 case http.StatusBadRequest: 226 if aerr.ErrorCode == "DIRECTORY_NOT_EMPTY" { 227 return DirectoryNotEmptyError{absPath} 228 } 229 case http.StatusNotFound: 230 return FileDoesNotExistError{absPath} 231 } 232 233 return err 234 } 235 236 func (w *WorkspaceFilesClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { 237 absPath, err := w.root.Join(name) 238 if err != nil { 239 return nil, err 240 } 241 242 objects, err := w.workspaceClient.Workspace.ListAll(ctx, workspace.ListWorkspaceRequest{ 243 Path: absPath, 244 }) 245 246 if len(objects) == 1 && objects[0].Path == absPath { 247 return nil, NotADirectory{absPath} 248 } 249 250 if err != nil { 251 // If we got an API error we deal with it below. 252 var aerr *apierr.APIError 253 if !errors.As(err, &aerr) { 254 return nil, err 255 } 256 257 // This API returns a 404 if the specified path does not exist. 258 if aerr.StatusCode == http.StatusNotFound { 259 return nil, NoSuchDirectoryError{path.Dir(absPath)} 260 } 261 262 return nil, err 263 } 264 265 info := make([]fs.DirEntry, len(objects)) 266 for i, v := range objects { 267 info[i] = wsfsDirEntry{wsfsFileInfo{oi: v}} 268 } 269 270 // Sort by name for parity with os.ReadDir. 271 sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() }) 272 return info, nil 273 } 274 275 func (w *WorkspaceFilesClient) Mkdir(ctx context.Context, name string) error { 276 dirPath, err := w.root.Join(name) 277 if err != nil { 278 return err 279 } 280 return w.workspaceClient.Workspace.Mkdirs(ctx, workspace.Mkdirs{ 281 Path: dirPath, 282 }) 283 } 284 285 func (w *WorkspaceFilesClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { 286 absPath, err := w.root.Join(name) 287 if err != nil { 288 return nil, err 289 } 290 291 info, err := w.workspaceClient.Workspace.GetStatusByPath(ctx, absPath) 292 if err != nil { 293 // If we got an API error we deal with it below. 294 var aerr *apierr.APIError 295 if !errors.As(err, &aerr) { 296 return nil, err 297 } 298 299 // This API returns a 404 if the specified path does not exist. 300 if aerr.StatusCode == http.StatusNotFound { 301 return nil, FileDoesNotExistError{absPath} 302 } 303 } 304 305 return wsfsFileInfo{*info}, nil 306 }