github.com/databricks/cli@v0.203.0/libs/filer/dbfs_client.go (about) 1 package filer 2 3 import ( 4 "context" 5 "errors" 6 "io" 7 "io/fs" 8 "net/http" 9 "path" 10 "sort" 11 "strings" 12 "time" 13 14 "github.com/databricks/databricks-sdk-go" 15 "github.com/databricks/databricks-sdk-go/apierr" 16 "github.com/databricks/databricks-sdk-go/service/files" 17 "golang.org/x/exp/slices" 18 ) 19 20 // Type that implements fs.DirEntry for DBFS. 21 type dbfsDirEntry struct { 22 dbfsFileInfo 23 } 24 25 func (entry dbfsDirEntry) Type() fs.FileMode { 26 return entry.Mode() 27 } 28 29 func (entry dbfsDirEntry) Info() (fs.FileInfo, error) { 30 return entry.dbfsFileInfo, nil 31 } 32 33 // Type that implements fs.FileInfo for DBFS. 34 type dbfsFileInfo struct { 35 fi files.FileInfo 36 } 37 38 func (info dbfsFileInfo) Name() string { 39 return path.Base(info.fi.Path) 40 } 41 42 func (info dbfsFileInfo) Size() int64 { 43 return info.fi.FileSize 44 } 45 46 func (info dbfsFileInfo) Mode() fs.FileMode { 47 mode := fs.ModePerm 48 if info.fi.IsDir { 49 mode |= fs.ModeDir 50 } 51 return mode 52 } 53 54 func (info dbfsFileInfo) ModTime() time.Time { 55 return time.UnixMilli(info.fi.ModificationTime) 56 } 57 58 func (info dbfsFileInfo) IsDir() bool { 59 return info.fi.IsDir 60 } 61 62 func (info dbfsFileInfo) Sys() any { 63 return info.fi 64 } 65 66 // DbfsClient implements the [Filer] interface for the DBFS backend. 67 type DbfsClient struct { 68 workspaceClient *databricks.WorkspaceClient 69 70 // File operations will be relative to this path. 71 root WorkspaceRootPath 72 } 73 74 func NewDbfsClient(w *databricks.WorkspaceClient, root string) (Filer, error) { 75 return &DbfsClient{ 76 workspaceClient: w, 77 78 root: NewWorkspaceRootPath(root), 79 }, nil 80 } 81 82 func (w *DbfsClient) Write(ctx context.Context, name string, reader io.Reader, mode ...WriteMode) error { 83 absPath, err := w.root.Join(name) 84 if err != nil { 85 return err 86 } 87 88 fileMode := files.FileModeWrite 89 if slices.Contains(mode, OverwriteIfExists) { 90 fileMode |= files.FileModeOverwrite 91 } 92 93 // Issue info call before write because it automatically creates parent directories. 94 // 95 // For discussion: we could decide this is actually convenient, remove the call below, 96 // and apply the same semantics for the WSFS filer. 97 // 98 if !slices.Contains(mode, CreateParentDirectories) { 99 _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, path.Dir(absPath)) 100 if err != nil { 101 var aerr *apierr.APIError 102 if !errors.As(err, &aerr) { 103 return err 104 } 105 106 // This API returns a 404 if the file doesn't exist. 107 if aerr.StatusCode == http.StatusNotFound { 108 if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { 109 return NoSuchDirectoryError{path.Dir(absPath)} 110 } 111 } 112 113 return err 114 } 115 } 116 117 handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, fileMode) 118 if err != nil { 119 var aerr *apierr.APIError 120 if !errors.As(err, &aerr) { 121 return err 122 } 123 124 // This API returns a 400 if the file already exists. 125 if aerr.StatusCode == http.StatusBadRequest { 126 if aerr.ErrorCode == "RESOURCE_ALREADY_EXISTS" { 127 return FileAlreadyExistsError{absPath} 128 } 129 } 130 131 return err 132 } 133 134 _, err = io.Copy(handle, reader) 135 cerr := handle.Close() 136 if err == nil { 137 err = cerr 138 } 139 140 return err 141 } 142 143 func (w *DbfsClient) Read(ctx context.Context, name string) (io.ReadCloser, error) { 144 absPath, err := w.root.Join(name) 145 if err != nil { 146 return nil, err 147 } 148 149 handle, err := w.workspaceClient.Dbfs.Open(ctx, absPath, files.FileModeRead) 150 if err != nil { 151 // Return error if file is a directory 152 if strings.Contains(err.Error(), "cannot open directory for reading") { 153 return nil, NotAFile{absPath} 154 } 155 156 var aerr *apierr.APIError 157 if !errors.As(err, &aerr) { 158 return nil, err 159 } 160 161 // This API returns a 404 if the file doesn't exist. 162 if aerr.StatusCode == http.StatusNotFound { 163 if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { 164 return nil, FileDoesNotExistError{absPath} 165 } 166 } 167 168 return nil, err 169 } 170 171 // A DBFS handle open for reading does not need to be closed. 172 return io.NopCloser(handle), nil 173 } 174 175 func (w *DbfsClient) Delete(ctx context.Context, name string, mode ...DeleteMode) error { 176 absPath, err := w.root.Join(name) 177 if err != nil { 178 return err 179 } 180 181 // Illegal to delete the root path. 182 if absPath == w.root.rootPath { 183 return CannotDeleteRootError{} 184 } 185 186 // Issue info call before delete because delete succeeds if the specified path doesn't exist. 187 // 188 // For discussion: we could decide this is actually convenient, remove the call below, 189 // and apply the same semantics for the WSFS filer. 190 // 191 _, err = w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) 192 if err != nil { 193 var aerr *apierr.APIError 194 if !errors.As(err, &aerr) { 195 return err 196 } 197 198 // This API returns a 404 if the file doesn't exist. 199 if aerr.StatusCode == http.StatusNotFound { 200 if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { 201 return FileDoesNotExistError{absPath} 202 } 203 } 204 205 return err 206 } 207 208 recursive := false 209 if slices.Contains(mode, DeleteRecursively) { 210 recursive = true 211 } 212 213 err = w.workspaceClient.Dbfs.Delete(ctx, files.Delete{ 214 Path: absPath, 215 Recursive: recursive, 216 }) 217 218 // Return early on success. 219 if err == nil { 220 return nil 221 } 222 223 // Special handling of this error only if it is an API error. 224 var aerr *apierr.APIError 225 if !errors.As(err, &aerr) { 226 return err 227 } 228 229 switch aerr.StatusCode { 230 case http.StatusBadRequest: 231 // Anecdotally, this error is returned when attempting to delete a non-empty directory. 232 if aerr.ErrorCode == "IO_ERROR" { 233 return DirectoryNotEmptyError{absPath} 234 } 235 } 236 237 return err 238 } 239 240 func (w *DbfsClient) ReadDir(ctx context.Context, name string) ([]fs.DirEntry, error) { 241 absPath, err := w.root.Join(name) 242 if err != nil { 243 return nil, err 244 } 245 246 res, err := w.workspaceClient.Dbfs.ListByPath(ctx, absPath) 247 if err != nil { 248 var aerr *apierr.APIError 249 if !errors.As(err, &aerr) { 250 return nil, err 251 } 252 253 // This API returns a 404 if the file doesn't exist. 254 if aerr.StatusCode == http.StatusNotFound { 255 if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { 256 return nil, NoSuchDirectoryError{absPath} 257 } 258 } 259 260 return nil, err 261 } 262 263 if len(res.Files) == 1 && res.Files[0].Path == absPath { 264 return nil, NotADirectory{absPath} 265 } 266 267 info := make([]fs.DirEntry, len(res.Files)) 268 for i, v := range res.Files { 269 info[i] = dbfsDirEntry{dbfsFileInfo: dbfsFileInfo{fi: v}} 270 } 271 272 // Sort by name for parity with os.ReadDir. 273 sort.Slice(info, func(i, j int) bool { return info[i].Name() < info[j].Name() }) 274 return info, nil 275 } 276 277 func (w *DbfsClient) Mkdir(ctx context.Context, name string) error { 278 dirPath, err := w.root.Join(name) 279 if err != nil { 280 return err 281 } 282 283 return w.workspaceClient.Dbfs.MkdirsByPath(ctx, dirPath) 284 } 285 286 func (w *DbfsClient) Stat(ctx context.Context, name string) (fs.FileInfo, error) { 287 absPath, err := w.root.Join(name) 288 if err != nil { 289 return nil, err 290 } 291 292 info, err := w.workspaceClient.Dbfs.GetStatusByPath(ctx, absPath) 293 if err != nil { 294 var aerr *apierr.APIError 295 if !errors.As(err, &aerr) { 296 return nil, err 297 } 298 299 // This API returns a 404 if the file doesn't exist. 300 if aerr.StatusCode == http.StatusNotFound { 301 if aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" { 302 return nil, FileDoesNotExistError{absPath} 303 } 304 } 305 306 return nil, err 307 } 308 309 return dbfsFileInfo{*info}, nil 310 }