github.com/cs3org/reva/v2@v2.27.7/pkg/storage/fs/cephfs/chunking.go (about) 1 // Copyright 2018-2021 CERN 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // In applying this license, CERN does not waive the privileges and immunities 16 // granted to it by virtue of its status as an Intergovernmental Organization 17 // or submit itself to any jurisdiction. 18 19 //go:build ceph 20 // +build ceph 21 22 package cephfs 23 24 import ( 25 "context" 26 "fmt" 27 "io" 28 "os" 29 "path/filepath" 30 "regexp" 31 "strconv" 32 "strings" 33 "time" 34 35 cephfs2 "github.com/ceph/go-ceph/cephfs" 36 "github.com/google/uuid" 37 ) 38 39 // IsChunked checks if a given path refers to a chunk or not 40 func IsChunked(fn string) (bool, error) { 41 // FIXME: also need to check whether the OC-Chunked header is set 42 return regexp.MatchString(`-chunking-\w+-[0-9]+-[0-9]+$`, fn) 43 } 44 45 // ChunkBLOBInfo stores info about a particular chunk 46 type ChunkBLOBInfo struct { 47 Path string 48 TransferID string 49 TotalChunks int 50 CurrentChunk int 51 } 52 53 // Not using the resource path in the chunk folder name allows uploading to 54 // the same folder after a move without having to restart the chunk upload 55 func (c *ChunkBLOBInfo) uploadID() string { 56 return fmt.Sprintf("chunking-%s-%d", c.TransferID, c.TotalChunks) 57 } 58 59 // GetChunkBLOBInfo decodes a chunk name to retrieve info about it. 60 func GetChunkBLOBInfo(path string) (*ChunkBLOBInfo, error) { 61 parts := strings.Split(path, "-chunking-") 62 tail := strings.Split(parts[1], "-") 63 64 totalChunks, err := strconv.Atoi(tail[1]) 65 if err != nil { 66 return nil, err 67 } 68 69 currentChunk, err := strconv.Atoi(tail[2]) 70 if err != nil { 71 return nil, err 72 } 73 if currentChunk >= totalChunks { 74 return nil, fmt.Errorf("current chunk:%d exceeds total number of chunks:%d", currentChunk, totalChunks) 75 } 76 77 return &ChunkBLOBInfo{ 78 Path: parts[0], 79 TransferID: tail[0], 80 TotalChunks: totalChunks, 81 CurrentChunk: currentChunk, 82 }, nil 83 } 84 85 // ChunkHandler manages chunked uploads, storing the chunks in a temporary directory 86 // until it gets the final chunk which is then returned. 87 type ChunkHandler struct { 88 user *User 89 chunkFolder string 90 } 91 92 // NewChunkHandler creates a handler for chunked uploads. 93 func NewChunkHandler(ctx context.Context, fs *cephfs) *ChunkHandler { 94 return &ChunkHandler{fs.makeUser(ctx), fs.conf.UploadFolder} 95 } 96 97 func (c *ChunkHandler) getChunkTempFileName() string { 98 return fmt.Sprintf("__%d_%s", time.Now().Unix(), uuid.New().String()) 99 } 100 101 func (c *ChunkHandler) getChunkFolderName(i *ChunkBLOBInfo) (path string, err error) { 102 path = filepath.Join(c.chunkFolder, i.uploadID()) 103 c.user.op(func(cv *cacheVal) { 104 err = cv.mount.MakeDir(path, 0777) 105 }) 106 107 return 108 } 109 110 func (c *ChunkHandler) saveChunk(path string, r io.ReadCloser) (finish bool, chunk string, err error) { 111 var chunkInfo *ChunkBLOBInfo 112 113 chunkInfo, err = GetChunkBLOBInfo(path) 114 if err != nil { 115 err = fmt.Errorf("error getting chunk info from path: %s", path) 116 return 117 } 118 119 chunkTempFilename := c.getChunkTempFileName() 120 c.user.op(func(cv *cacheVal) { 121 var tmpFile *cephfs2.File 122 target := filepath.Join(c.chunkFolder, chunkTempFilename) 123 tmpFile, err = cv.mount.Open(target, os.O_CREATE|os.O_WRONLY, filePermDefault) 124 defer closeFile(tmpFile) 125 if err != nil { 126 return 127 } 128 _, err = io.Copy(tmpFile, r) 129 }) 130 if err != nil { 131 return 132 } 133 134 chunksFolderName, err := c.getChunkFolderName(chunkInfo) 135 if err != nil { 136 return 137 } 138 // c.logger.Info().Log("chunkfolder", chunksFolderName) 139 140 chunkTarget := filepath.Join(chunksFolderName, strconv.Itoa(chunkInfo.CurrentChunk)) 141 c.user.op(func(cv *cacheVal) { 142 err = cv.mount.Rename(chunkTempFilename, chunkTarget) 143 }) 144 if err != nil { 145 return 146 } 147 148 // Check that all chunks are uploaded. 149 // This is very inefficient, the server has to check that it has all the 150 // chunks after each uploaded chunk. 151 // A two-phase upload like DropBox is better, because the server will 152 // assembly the chunks when the client asks for it. 153 numEntries := 0 154 c.user.op(func(cv *cacheVal) { 155 var dir *cephfs2.Directory 156 var entry *cephfs2.DirEntry 157 var chunkFile, assembledFile *cephfs2.File 158 159 dir, err = cv.mount.OpenDir(chunksFolderName) 160 defer closeDir(dir) 161 162 for entry, err = dir.ReadDir(); entry != nil && err == nil; entry, err = dir.ReadDir() { 163 numEntries++ 164 } 165 // to remove . and .. 166 numEntries -= 2 167 168 if err != nil || numEntries < chunkInfo.TotalChunks { 169 return 170 } 171 172 chunk = filepath.Join(c.chunkFolder, c.getChunkTempFileName()) 173 assembledFile, err = cv.mount.Open(chunk, os.O_CREATE|os.O_WRONLY, filePermDefault) 174 defer closeFile(assembledFile) 175 defer deleteFile(cv.mount, chunk) 176 if err != nil { 177 return 178 } 179 180 for i := 0; i < numEntries; i++ { 181 target := filepath.Join(chunksFolderName, strconv.Itoa(i)) 182 183 chunkFile, err = cv.mount.Open(target, os.O_RDONLY, 0) 184 if err != nil { 185 return 186 } 187 _, err = io.Copy(assembledFile, chunkFile) 188 closeFile(chunkFile) 189 if err != nil { 190 return 191 } 192 } 193 194 // necessary approach in case assembly fails 195 for i := 0; i < numEntries; i++ { 196 target := filepath.Join(chunksFolderName, strconv.Itoa(i)) 197 err = cv.mount.Unlink(target) 198 if err != nil { 199 return 200 } 201 } 202 _ = cv.mount.Unlink(chunksFolderName) 203 }) 204 205 return true, chunk, nil 206 } 207 208 // WriteChunk saves an intermediate chunk temporarily and assembles all chunks 209 // once the final one is received. 210 func (c *ChunkHandler) WriteChunk(fn string, r io.ReadCloser) (string, string, error) { 211 finish, chunk, err := c.saveChunk(fn, r) 212 if err != nil { 213 return "", "", err 214 } 215 216 if !finish { 217 return "", "", nil 218 } 219 220 chunkInfo, err := GetChunkBLOBInfo(fn) 221 if err != nil { 222 return "", "", err 223 } 224 225 return chunkInfo.Path, chunk, nil 226 227 // TODO(labkode): implement old chunking 228 229 /* 230 req2 := &provider.StartWriteSessionRequest{} 231 res2, err := client.StartWriteSession(ctx, req2) 232 if err != nil { 233 logger.Error(ctx, err) 234 w.WriteHeader(http.StatusInternalServerError) 235 return 236 } 237 238 if res2.Status.Code != rpc.Code_CODE_OK { 239 logger.Println(ctx, res2.Status) 240 w.WriteHeader(http.StatusInternalServerError) 241 return 242 } 243 244 sessID := res2.SessionId 245 logger.Build().Str("sessID", sessID).Msg(ctx, "got write session id") 246 247 stream, err := client.Write(ctx) 248 if err != nil { 249 logger.Error(ctx, err) 250 w.WriteHeader(http.StatusInternalServerError) 251 return 252 } 253 254 buffer := make([]byte, 1024*1024*3) 255 var offset uint64 256 var numChunks uint64 257 258 for { 259 n, err := fd.Read(buffer) 260 if n > 0 { 261 req := &provider.WriteRequest{Data: buffer, Length: uint64(n), SessionId: sessID, Offset: offset} 262 err = stream.Send(req) 263 if err != nil { 264 logger.Error(ctx, err) 265 w.WriteHeader(http.StatusInternalServerError) 266 return 267 } 268 269 numChunks++ 270 offset += uint64(n) 271 } 272 273 if err == io.EOF { 274 break 275 } 276 277 if err != nil { 278 logger.Error(ctx, err) 279 w.WriteHeader(http.StatusInternalServerError) 280 return 281 } 282 } 283 284 res3, err := stream.CloseAndRecv() 285 if err != nil { 286 logger.Error(ctx, err) 287 w.WriteHeader(http.StatusInternalServerError) 288 return 289 } 290 291 if res3.Status.Code != rpc.Code_CODE_OK { 292 logger.Println(ctx, err) 293 w.WriteHeader(http.StatusInternalServerError) 294 return 295 } 296 297 req4 := &provider.FinishWriteSessionRequest{Filename: chunkInfo.path, SessionId: sessID} 298 res4, err := client.FinishWriteSession(ctx, req4) 299 if err != nil { 300 logger.Error(ctx, err) 301 w.WriteHeader(http.StatusInternalServerError) 302 return 303 } 304 305 if res4.Status.Code != rpc.Code_CODE_OK { 306 logger.Println(ctx, res4.Status) 307 w.WriteHeader(http.StatusInternalServerError) 308 return 309 } 310 311 req.Filename = chunkInfo.path 312 res, err = client.Stat(ctx, req) 313 if err != nil { 314 logger.Error(ctx, err) 315 w.WriteHeader(http.StatusInternalServerError) 316 return 317 } 318 319 if res.Status.Code != rpc.Code_CODE_OK { 320 logger.Println(ctx, res.Status) 321 w.WriteHeader(http.StatusInternalServerError) 322 return 323 } 324 325 md2 := res.Metadata 326 327 w.Header().Add("Content-Type", md2.Mime) 328 w.Header().Set("ETag", md2.Etag) 329 w.Header().Set("OC-FileId", md2.Id) 330 w.Header().Set("OC-ETag", md2.Etag) 331 t := time.Unix(int64(md2.Mtime), 0) 332 lastModifiedString := t.Format(time.RFC1123Z) 333 w.Header().Set("Last-Modified", lastModifiedString) 334 w.Header().Set("X-OC-MTime", "accepted") 335 336 if md == nil { 337 w.WriteHeader(http.StatusCreated) 338 return 339 } 340 341 w.WriteHeader(http.StatusNoContent) 342 return 343 */ 344 }