github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/decomposedfs/upload/upload.go (about) 1 // Copyright 2018-2022 CERN 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // In applying this license, CERN does not waive the privileges and immunities 16 // granted to it by virtue of its status as an Intergovernmental Organization 17 // or submit itself to any jurisdiction. 18 19 package upload 20 21 import ( 22 "context" 23 "encoding/hex" 24 "fmt" 25 "hash" 26 "io" 27 "io/fs" 28 "net/http" 29 "os" 30 "strconv" 31 "strings" 32 "time" 33 34 userpb "github.com/cs3org/go-cs3apis/cs3/identity/user/v1beta1" 35 provider "github.com/cs3org/go-cs3apis/cs3/storage/provider/v1beta1" 36 "github.com/golang-jwt/jwt/v5" 37 "github.com/pkg/errors" 38 tusd "github.com/tus/tusd/v2/pkg/handler" 39 "go.opentelemetry.io/otel" 40 "go.opentelemetry.io/otel/trace" 41 42 "github.com/cs3org/reva/v2/pkg/appctx" 43 ctxpkg "github.com/cs3org/reva/v2/pkg/ctx" 44 "github.com/cs3org/reva/v2/pkg/errtypes" 45 "github.com/cs3org/reva/v2/pkg/events" 46 "github.com/cs3org/reva/v2/pkg/rhttp/datatx/metrics" 47 "github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/metadata/prefixes" 48 "github.com/cs3org/reva/v2/pkg/storage/utils/decomposedfs/node" 49 "github.com/cs3org/reva/v2/pkg/utils" 50 ) 51 52 var ( 53 tracer trace.Tracer 54 ErrAlreadyExists = tusd.NewError("ERR_ALREADY_EXISTS", "file already exists", http.StatusConflict) 55 defaultFilePerm = os.FileMode(0664) 56 ) 57 58 func init() { 59 tracer = otel.Tracer("github.com/cs3org/reva/pkg/storage/utils/decomposedfs/upload") 60 } 61 62 // WriteChunk writes the stream from the reader to the given offset of the upload 63 func (session *OcisSession) WriteChunk(ctx context.Context, offset int64, src io.Reader) (int64, error) { 64 ctx, span := tracer.Start(session.Context(ctx), "WriteChunk") 65 defer span.End() 66 _, subspan := tracer.Start(ctx, "os.OpenFile") 67 file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm) 68 subspan.End() 69 if err != nil { 70 return 0, err 71 } 72 defer file.Close() 73 74 // calculate cheksum here? needed for the TUS checksum extension. https://tus.io/protocols/resumable-upload.html#checksum 75 // TODO but how do we get the `Upload-Checksum`? WriteChunk() only has a context, offset and the reader ... 76 // It is sent with the PATCH request, well or in the POST when the creation-with-upload extension is used 77 // but the tus handler uses a context.Background() so we cannot really check the header and put it in the context ... 78 _, subspan = tracer.Start(ctx, "io.Copy") 79 n, err := io.Copy(file, src) 80 subspan.End() 81 82 // If the HTTP PATCH request gets interrupted in the middle (e.g. because 83 // the user wants to pause the upload), Go's net/http returns an io.ErrUnexpectedEOF. 84 // However, for the ocis driver it's not important whether the stream has ended 85 // on purpose or accidentally. 86 if err != nil && err != io.ErrUnexpectedEOF { 87 return n, err 88 } 89 90 // update upload.Session.Offset so subsequent code flow can use it. 91 // No need to persist the session as the offset is determined by stating the blob in the GetUpload / ReadSession codepath. 92 // The session offset is written to disk in FinishUpload 93 session.info.Offset += n 94 return n, nil 95 } 96 97 // GetInfo returns the FileInfo 98 func (session *OcisSession) GetInfo(_ context.Context) (tusd.FileInfo, error) { 99 return session.ToFileInfo(), nil 100 } 101 102 // GetReader returns an io.Reader for the upload 103 func (session *OcisSession) GetReader(ctx context.Context) (io.ReadCloser, error) { 104 _, span := tracer.Start(session.Context(ctx), "GetReader") 105 defer span.End() 106 return os.Open(session.binPath()) 107 } 108 109 // FinishUpload finishes an upload and moves the file to the internal destination 110 // implements tusd.DataStore interface 111 // returns tusd errors 112 func (session *OcisSession) FinishUpload(ctx context.Context) error { 113 err := session.FinishUploadDecomposed(ctx) 114 115 // we need to return a tusd error here to make the tusd handler return the correct status code 116 switch err.(type) { 117 case errtypes.AlreadyExists: 118 return tusd.NewError("ERR_ALREADY_EXISTS", err.Error(), http.StatusConflict) 119 case errtypes.Aborted: 120 return tusd.NewError("ERR_PRECONDITION_FAILED", err.Error(), http.StatusPreconditionFailed) 121 default: 122 return err 123 } 124 } 125 126 // FinishUploadDecomposed finishes an upload and moves the file to the internal destination 127 // retures errtypes errors 128 func (session *OcisSession) FinishUploadDecomposed(ctx context.Context) error { 129 ctx, span := tracer.Start(session.Context(ctx), "FinishUpload") 130 defer span.End() 131 log := appctx.GetLogger(ctx) 132 133 ctx = ctxpkg.ContextSetInitiator(ctx, session.InitiatorID()) 134 135 sha1h, md5h, adler32h, err := node.CalculateChecksums(ctx, session.binPath()) 136 if err != nil { 137 return err 138 } 139 140 // compare if they match the sent checksum 141 // TODO the tus checksum extension would do this on every chunk, but I currently don't see an easy way to pass in the requested checksum. for now we do it in FinishUpload which is also called for chunked uploads 142 if session.info.MetaData["checksum"] != "" { 143 var err error 144 parts := strings.SplitN(session.info.MetaData["checksum"], " ", 2) 145 if len(parts) != 2 { 146 return errtypes.BadRequest("invalid checksum format. must be '[algorithm] [checksum]'") 147 } 148 switch parts[0] { 149 case "sha1": 150 err = checkHash(parts[1], sha1h) 151 case "md5": 152 err = checkHash(parts[1], md5h) 153 case "adler32": 154 err = checkHash(parts[1], adler32h) 155 default: 156 err = errtypes.BadRequest("unsupported checksum algorithm: " + parts[0]) 157 } 158 if err != nil { 159 session.store.Cleanup(ctx, session, true, false, false) 160 return err 161 } 162 } 163 164 // update checksums 165 attrs := node.Attributes{ 166 prefixes.ChecksumPrefix + "sha1": sha1h.Sum(nil), 167 prefixes.ChecksumPrefix + "md5": md5h.Sum(nil), 168 prefixes.ChecksumPrefix + "adler32": adler32h.Sum(nil), 169 } 170 171 // At this point we scope by the space to create the final file in the final location 172 if session.store.um != nil && session.info.Storage["SpaceGid"] != "" { 173 gid, err := strconv.Atoi(session.info.Storage["SpaceGid"]) 174 if err != nil { 175 return errors.Wrap(err, "failed to parse space gid") 176 } 177 178 unscope, err := session.store.um.ScopeUserByIds(-1, gid) 179 if err != nil { 180 return errors.Wrap(err, "failed to scope user") 181 } 182 if unscope != nil { 183 defer func() { _ = unscope() }() 184 } 185 } 186 187 n, err := session.store.CreateNodeForUpload(ctx, session, attrs) 188 if err != nil { 189 return err 190 } 191 // increase the processing counter for every started processing 192 // will be decreased in Cleanup() 193 metrics.UploadProcessing.Inc() 194 metrics.UploadSessionsBytesReceived.Inc() 195 196 if session.store.pub != nil && session.info.Size > 0 { 197 u, _ := ctxpkg.ContextGetUser(ctx) 198 s, err := session.URL(ctx) 199 if err != nil { 200 return err 201 } 202 203 var iu *userpb.User 204 if utils.ExistsInOpaque(u.Opaque, "impersonating-user") { 205 iu = &userpb.User{} 206 if err := utils.ReadJSONFromOpaque(u.Opaque, "impersonating-user", iu); err != nil { 207 return err 208 } 209 } 210 211 if err := events.Publish(ctx, session.store.pub, events.BytesReceived{ 212 UploadID: session.ID(), 213 URL: s, 214 SpaceOwner: n.SpaceOwnerOrManager(session.Context(ctx)), 215 ExecutingUser: u, 216 ResourceID: &provider.ResourceId{SpaceId: n.SpaceID, OpaqueId: n.ID}, 217 Filename: session.Filename(), 218 Filesize: uint64(session.Size()), 219 ImpersonatingUser: iu, 220 }); err != nil { 221 return err 222 } 223 } 224 225 // if the upload is synchronous or the upload is empty, finalize it now 226 // for 0-byte uploads we take a shortcut and finalize isn't called elsewhere 227 if !session.store.async || session.info.Size == 0 { 228 // handle postprocessing synchronously 229 err = session.Finalize(ctx) 230 session.store.Cleanup(ctx, session, err != nil, false, err == nil) 231 if err != nil { 232 log.Error().Err(err).Msg("failed to upload") 233 return err 234 } 235 metrics.UploadSessionsFinalized.Inc() 236 } 237 238 return session.store.tp.Propagate(ctx, n, session.SizeDiff()) 239 } 240 241 // Terminate terminates the upload 242 func (session *OcisSession) Terminate(_ context.Context) error { 243 session.Cleanup(true, true, true) 244 return nil 245 } 246 247 // DeclareLength updates the upload length information 248 func (session *OcisSession) DeclareLength(ctx context.Context, length int64) error { 249 session.info.Size = length 250 session.info.SizeIsDeferred = false 251 return session.store.um.RunInBaseScope(func() error { 252 return session.Persist(session.Context(ctx)) 253 }) 254 } 255 256 // ConcatUploads concatenates multiple uploads 257 func (session *OcisSession) ConcatUploads(_ context.Context, uploads []tusd.Upload) (err error) { 258 file, err := os.OpenFile(session.binPath(), os.O_WRONLY|os.O_APPEND, defaultFilePerm) 259 if err != nil { 260 return err 261 } 262 defer file.Close() 263 264 for _, partialUpload := range uploads { 265 fileUpload := partialUpload.(*OcisSession) 266 267 src, err := os.Open(fileUpload.binPath()) 268 if err != nil { 269 return err 270 } 271 defer src.Close() 272 273 if _, err := io.Copy(file, src); err != nil { 274 return err 275 } 276 } 277 278 return 279 } 280 281 // Finalize finalizes the upload (eg moves the file to the internal destination) 282 func (session *OcisSession) Finalize(ctx context.Context) (err error) { 283 ctx, span := tracer.Start(session.Context(ctx), "Finalize") 284 defer span.End() 285 286 revisionNode := node.New(session.SpaceID(), session.NodeID(), "", "", session.Size(), session.ID(), 287 provider.ResourceType_RESOURCE_TYPE_FILE, session.SpaceOwner(), session.store.lu) 288 289 // upload the data to the blobstore 290 _, subspan := tracer.Start(ctx, "WriteBlob") 291 err = session.store.tp.WriteBlob(revisionNode, session.binPath()) 292 subspan.End() 293 if err != nil { 294 return errors.Wrap(err, "failed to upload file to blobstore") 295 } 296 297 return nil 298 } 299 300 func checkHash(expected string, h hash.Hash) error { 301 hash := hex.EncodeToString(h.Sum(nil)) 302 if expected != hash { 303 return errtypes.ChecksumMismatch(fmt.Sprintf("invalid checksum: expected %s got %x", expected, hash)) 304 } 305 return nil 306 } 307 308 func (session *OcisSession) removeNode(ctx context.Context) { 309 n, err := session.Node(ctx) 310 if err != nil { 311 appctx.GetLogger(ctx).Error().Str("session", session.ID()).Err(err).Msg("getting node from session failed") 312 return 313 } 314 if err := n.Purge(ctx); err != nil { 315 appctx.GetLogger(ctx).Error().Str("nodepath", n.InternalPath()).Err(err).Msg("purging node failed") 316 } 317 } 318 319 // cleanup cleans up after the upload is finished 320 func (session *OcisSession) Cleanup(revertNodeMetadata, cleanBin, cleanInfo bool) { 321 ctx := session.Context(context.Background()) 322 323 if revertNodeMetadata { 324 n, err := session.Node(ctx) 325 if err != nil { 326 appctx.GetLogger(ctx).Error().Err(err).Str("sessionid", session.ID()).Msg("reading node for session failed") 327 } else { 328 if session.NodeExists() && session.info.MetaData["versionsPath"] != "" { 329 p := session.info.MetaData["versionsPath"] 330 if err := session.store.lu.CopyMetadata(ctx, p, n.InternalPath(), func(attributeName string, value []byte) (newValue []byte, copy bool) { 331 return value, strings.HasPrefix(attributeName, prefixes.ChecksumPrefix) || 332 attributeName == prefixes.TypeAttr || 333 attributeName == prefixes.BlobIDAttr || 334 attributeName == prefixes.BlobsizeAttr || 335 attributeName == prefixes.MTimeAttr 336 }, true); err != nil { 337 appctx.GetLogger(ctx).Info().Str("versionpath", p).Str("nodepath", n.InternalPath()).Err(err).Msg("renaming version node failed") 338 } 339 340 if err := os.RemoveAll(p); err != nil { 341 appctx.GetLogger(ctx).Info().Str("versionpath", p).Str("nodepath", n.InternalPath()).Err(err).Msg("error removing version") 342 } 343 344 } else { 345 // if no other upload session is in progress (processing id != session id) or has finished (processing id == "") 346 latestSession, err := n.ProcessingID(ctx) 347 if err != nil { 348 appctx.GetLogger(ctx).Error().Err(err).Str("spaceid", n.SpaceID).Str("nodeid", n.ID).Str("uploadid", session.ID()).Msg("reading processingid for session failed") 349 } 350 if latestSession == session.ID() { 351 // actually delete the node 352 session.removeNode(ctx) 353 } 354 // FIXME else if the upload has become a revision, delete the revision, or if it is the last one, delete the node 355 } 356 } 357 } 358 359 if cleanBin { 360 if err := os.Remove(session.binPath()); err != nil && !errors.Is(err, fs.ErrNotExist) { 361 appctx.GetLogger(ctx).Error().Str("path", session.binPath()).Err(err).Msg("removing upload failed") 362 } 363 } 364 365 if cleanInfo { 366 if err := session.Purge(ctx); err != nil && !errors.Is(err, fs.ErrNotExist) { 367 appctx.GetLogger(ctx).Error().Err(err).Str("session", session.ID()).Msg("removing upload info failed") 368 } 369 } 370 } 371 372 // URL returns a url to download an upload 373 func (session *OcisSession) URL(_ context.Context) (string, error) { 374 type transferClaims struct { 375 jwt.RegisteredClaims 376 Target string `json:"target"` 377 } 378 379 u := joinurl(session.store.tknopts.DownloadEndpoint, "tus/", session.ID()) 380 ttl := time.Duration(session.store.tknopts.TransferExpires) * time.Second 381 claims := transferClaims{ 382 RegisteredClaims: jwt.RegisteredClaims{ 383 ExpiresAt: jwt.NewNumericDate(time.Now().Add(ttl)), 384 Audience: jwt.ClaimStrings{"reva"}, 385 IssuedAt: jwt.NewNumericDate(time.Now()), 386 }, 387 Target: u, 388 } 389 390 t := jwt.NewWithClaims(jwt.GetSigningMethod("HS256"), claims) 391 392 tkn, err := t.SignedString([]byte(session.store.tknopts.TransferSharedSecret)) 393 if err != nil { 394 return "", errors.Wrapf(err, "error signing token with claims %+v", claims) 395 } 396 397 return joinurl(session.store.tknopts.DataGatewayEndpoint, tkn), nil 398 } 399 400 // replace with url.JoinPath after switching to go1.19 401 func joinurl(paths ...string) string { 402 var s strings.Builder 403 l := len(paths) 404 for i, p := range paths { 405 s.WriteString(p) 406 if !strings.HasSuffix(p, "/") && i != l-1 { 407 s.WriteString("/") 408 } 409 } 410 411 return s.String() 412 }