github.com/treeverse/lakefs@v1.24.1-0.20240520134607-95648127bfb0/pkg/gateway/operations/deleteobjects.go (about) 1 package operations 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "net/http" 8 9 "github.com/treeverse/lakefs/pkg/auth" 10 "github.com/treeverse/lakefs/pkg/catalog" 11 gerrors "github.com/treeverse/lakefs/pkg/gateway/errors" 12 "github.com/treeverse/lakefs/pkg/gateway/path" 13 "github.com/treeverse/lakefs/pkg/gateway/serde" 14 "github.com/treeverse/lakefs/pkg/graveler" 15 "github.com/treeverse/lakefs/pkg/logging" 16 "github.com/treeverse/lakefs/pkg/permissions" 17 ) 18 19 // maxDeleteObjects maximum number of objects we can delete in one call. 20 // base on https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html 21 const maxDeleteObjects = 1000 22 23 type DeleteObjects struct{} 24 25 func (controller *DeleteObjects) RequiredPermissions(_ *http.Request, _ string) (permissions.Node, error) { 26 return permissions.Node{}, nil 27 } 28 29 func (controller *DeleteObjects) Handle(w http.ResponseWriter, req *http.Request, o *RepoOperation) { 30 // verify we only handle delete request 31 query := req.URL.Query() 32 if !query.Has("delete") { 33 _ = o.EncodeError(w, req, nil, gerrors.ERRLakeFSNotSupported.ToAPIErr()) 34 return 35 } 36 37 o.Incr("delete_objects", o.Principal, o.Repository.Name, "") 38 decodedXML := &serde.Delete{} 39 err := DecodeXMLBody(req.Body, decodedXML) 40 if err != nil { 41 _ = o.EncodeError(w, req, err, gerrors.Codes.ToAPIErr(gerrors.ErrBadRequest)) 42 return 43 } 44 if len(decodedXML.Object) == 0 || len(decodedXML.Object) > maxDeleteObjects { 45 _ = o.EncodeError(w, req, err, gerrors.Codes.ToAPIErr(gerrors.ErrMalformedXML)) 46 return 47 } 48 49 // delete all the files and collect responses 50 // arrays of keys/path to delete, left after authorization check 51 var ( 52 keysToDelete []string 53 pathsToDelete []string 54 refsToDelete []string 55 errs []serde.DeleteError 56 ) 57 for _, obj := range decodedXML.Object { 58 resolvedPath, err := path.ResolvePath(obj.Key) 59 if err != nil { 60 errs = append(errs, serde.DeleteError{ 61 Code: "ErrDeletingKey", 62 Key: obj.Key, 63 Message: fmt.Sprintf("error deleting object: %s", err), 64 }) 65 continue 66 } 67 // authorize this object deletion 68 authResp, err := o.Auth.Authorize(req.Context(), &auth.AuthorizationRequest{ 69 Username: o.Principal, 70 RequiredPermissions: permissions.Node{ 71 Permission: permissions.Permission{ 72 Action: permissions.DeleteObjectAction, 73 Resource: permissions.ObjectArn(o.Repository.Name, resolvedPath.Path), 74 }, 75 }, 76 }) 77 if err != nil || !authResp.Allowed { 78 errs = append(errs, serde.DeleteError{ 79 Code: "AccessDenied", 80 Key: obj.Key, 81 Message: "Access Denied", 82 }) 83 continue 84 } 85 86 keysToDelete = append(keysToDelete, obj.Key) 87 refsToDelete = append(refsToDelete, resolvedPath.Ref) 88 pathsToDelete = append(pathsToDelete, resolvedPath.Path) 89 } 90 if len(pathsToDelete) == 0 { 91 // construct response - probably we failed with all errors 92 resp := serde.DeleteResult{Error: errs} 93 o.EncodeResponse(w, req, resp, http.StatusOK) 94 return 95 } 96 97 // batch delete - if all paths to delete are same ref 98 canBatch := true 99 for i := 1; i < len(refsToDelete); i++ { 100 if refsToDelete[0] != refsToDelete[i] { 101 canBatch = false 102 break 103 } 104 } 105 106 var resp serde.DeleteResult 107 if canBatch { 108 // batch - call batch delete for all keys on ref 109 resp = controller.batchDelete(req.Context(), o.Log(req), o, decodedXML.Quiet, refsToDelete[0], keysToDelete, pathsToDelete) 110 } else { 111 // non batch - call delete for each key 112 resp = controller.nonBatchDelete(req.Context(), o.Log(req), o, decodedXML.Quiet, keysToDelete, refsToDelete, pathsToDelete) 113 } 114 115 // construct response - concat what we had so far with delete results 116 if len(errs) > 0 { 117 resp.Error = append(errs, resp.Error...) 118 } 119 o.EncodeResponse(w, req, resp, http.StatusOK) 120 } 121 122 func (controller *DeleteObjects) nonBatchDelete(ctx context.Context, log logging.Logger, o *RepoOperation, quiet bool, keysToDelete []string, refsToDelete []string, pathsToDelete []string) serde.DeleteResult { 123 var result serde.DeleteResult 124 for i, key := range keysToDelete { 125 err := o.Catalog.DeleteEntry(ctx, o.Repository.Name, refsToDelete[i], pathsToDelete[i]) 126 updateDeleteResult(&result, quiet, log, key, err) 127 } 128 return result 129 } 130 131 func (controller *DeleteObjects) batchDelete(ctx context.Context, log logging.Logger, o *RepoOperation, quiet bool, ref string, keysToDelete []string, pathsToDelete []string) serde.DeleteResult { 132 var result serde.DeleteResult 133 batchErr := o.Catalog.DeleteEntries(ctx, o.Repository.Name, ref, pathsToDelete) 134 deleteErrs := graveler.NewMapDeleteErrors(batchErr) 135 for _, key := range keysToDelete { 136 // err will set to the specific error if possible, fallback to the batch delete error 137 err := deleteErrs[key] 138 if err == nil { 139 err = batchErr 140 } 141 updateDeleteResult(&result, quiet, log, key, err) 142 } 143 return result 144 } 145 146 // updateDeleteResult check the error and update the 'result' with error or delete response for 'key' 147 func updateDeleteResult(result *serde.DeleteResult, quiet bool, log logging.Logger, key string, err error) { 148 deleteError := checkForDeleteError(log, key, err) 149 if deleteError != nil { 150 result.Error = append(result.Error, *deleteError) 151 } else if !quiet { 152 result.Deleted = append(result.Deleted, serde.Deleted{Key: key}) 153 } 154 } 155 156 func checkForDeleteError(log logging.Logger, key string, err error) *serde.DeleteError { 157 switch { 158 case errors.Is(err, graveler.ErrNotFound): 159 log.Debug("tried to delete a non-existent object (OK)") 160 case errors.Is(err, graveler.ErrWriteToProtectedBranch): 161 apiErr := gerrors.Codes.ToAPIErr(gerrors.ErrWriteToProtectedBranch) 162 return &serde.DeleteError{ 163 Code: apiErr.Code, 164 Key: key, 165 Message: fmt.Sprintf("error deleting object: %s", apiErr.Description), 166 } 167 case errors.Is(err, graveler.ErrReadOnlyRepository): 168 apiErr := gerrors.Codes.ToAPIErr(gerrors.ErrReadOnlyRepository) 169 return &serde.DeleteError{ 170 Code: apiErr.Code, 171 Key: key, 172 Message: fmt.Sprintf("error deleting object: %s", apiErr.Description), 173 } 174 case errors.Is(err, catalog.ErrPathRequiredValue): 175 // issue #1706 - https://github.com/treeverse/lakeFS/issues/1706 176 // Spark trying to delete the path "main/", which we map to branch "main" with an empty path. 177 // Spark expects it to succeed (not deleting anything is a success), instead of returning an error. 178 log.Debug("tried to delete with an empty path") 179 case err != nil: 180 log.WithField("key", key).WithError(err).Error("failed deleting object") 181 return &serde.DeleteError{ 182 Code: "ErrDeletingKey", 183 Key: key, 184 Message: fmt.Sprintf("error deleting object: %s", err), 185 } 186 default: 187 log.WithField("key", key).Debug("object set for deletion") 188 } 189 return nil 190 }