github.com/cs3org/reva/v2@v2.27.7/pkg/storage/fs/cephfs/chunking.go (about)

     1  // Copyright 2018-2021 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  //go:build ceph
    20  // +build ceph
    21  
    22  package cephfs
    23  
    24  import (
    25  	"context"
    26  	"fmt"
    27  	"io"
    28  	"os"
    29  	"path/filepath"
    30  	"regexp"
    31  	"strconv"
    32  	"strings"
    33  	"time"
    34  
    35  	cephfs2 "github.com/ceph/go-ceph/cephfs"
    36  	"github.com/google/uuid"
    37  )
    38  
    39  // IsChunked checks if a given path refers to a chunk or not
    40  func IsChunked(fn string) (bool, error) {
    41  	// FIXME: also need to check whether the OC-Chunked header is set
    42  	return regexp.MatchString(`-chunking-\w+-[0-9]+-[0-9]+$`, fn)
    43  }
    44  
    45  // ChunkBLOBInfo stores info about a particular chunk
    46  type ChunkBLOBInfo struct {
    47  	Path         string
    48  	TransferID   string
    49  	TotalChunks  int
    50  	CurrentChunk int
    51  }
    52  
    53  // Not using the resource path in the chunk folder name allows uploading to
    54  // the same folder after a move without having to restart the chunk upload
    55  func (c *ChunkBLOBInfo) uploadID() string {
    56  	return fmt.Sprintf("chunking-%s-%d", c.TransferID, c.TotalChunks)
    57  }
    58  
    59  // GetChunkBLOBInfo decodes a chunk name to retrieve info about it.
    60  func GetChunkBLOBInfo(path string) (*ChunkBLOBInfo, error) {
    61  	parts := strings.Split(path, "-chunking-")
    62  	tail := strings.Split(parts[1], "-")
    63  
    64  	totalChunks, err := strconv.Atoi(tail[1])
    65  	if err != nil {
    66  		return nil, err
    67  	}
    68  
    69  	currentChunk, err := strconv.Atoi(tail[2])
    70  	if err != nil {
    71  		return nil, err
    72  	}
    73  	if currentChunk >= totalChunks {
    74  		return nil, fmt.Errorf("current chunk:%d exceeds total number of chunks:%d", currentChunk, totalChunks)
    75  	}
    76  
    77  	return &ChunkBLOBInfo{
    78  		Path:         parts[0],
    79  		TransferID:   tail[0],
    80  		TotalChunks:  totalChunks,
    81  		CurrentChunk: currentChunk,
    82  	}, nil
    83  }
    84  
    85  // ChunkHandler manages chunked uploads, storing the chunks in a temporary directory
    86  // until it gets the final chunk which is then returned.
    87  type ChunkHandler struct {
    88  	user        *User
    89  	chunkFolder string
    90  }
    91  
    92  // NewChunkHandler creates a handler for chunked uploads.
    93  func NewChunkHandler(ctx context.Context, fs *cephfs) *ChunkHandler {
    94  	return &ChunkHandler{fs.makeUser(ctx), fs.conf.UploadFolder}
    95  }
    96  
    97  func (c *ChunkHandler) getChunkTempFileName() string {
    98  	return fmt.Sprintf("__%d_%s", time.Now().Unix(), uuid.New().String())
    99  }
   100  
   101  func (c *ChunkHandler) getChunkFolderName(i *ChunkBLOBInfo) (path string, err error) {
   102  	path = filepath.Join(c.chunkFolder, i.uploadID())
   103  	c.user.op(func(cv *cacheVal) {
   104  		err = cv.mount.MakeDir(path, 0777)
   105  	})
   106  
   107  	return
   108  }
   109  
   110  func (c *ChunkHandler) saveChunk(path string, r io.ReadCloser) (finish bool, chunk string, err error) {
   111  	var chunkInfo *ChunkBLOBInfo
   112  
   113  	chunkInfo, err = GetChunkBLOBInfo(path)
   114  	if err != nil {
   115  		err = fmt.Errorf("error getting chunk info from path: %s", path)
   116  		return
   117  	}
   118  
   119  	chunkTempFilename := c.getChunkTempFileName()
   120  	c.user.op(func(cv *cacheVal) {
   121  		var tmpFile *cephfs2.File
   122  		target := filepath.Join(c.chunkFolder, chunkTempFilename)
   123  		tmpFile, err = cv.mount.Open(target, os.O_CREATE|os.O_WRONLY, filePermDefault)
   124  		defer closeFile(tmpFile)
   125  		if err != nil {
   126  			return
   127  		}
   128  		_, err = io.Copy(tmpFile, r)
   129  	})
   130  	if err != nil {
   131  		return
   132  	}
   133  
   134  	chunksFolderName, err := c.getChunkFolderName(chunkInfo)
   135  	if err != nil {
   136  		return
   137  	}
   138  	// c.logger.Info().Log("chunkfolder", chunksFolderName)
   139  
   140  	chunkTarget := filepath.Join(chunksFolderName, strconv.Itoa(chunkInfo.CurrentChunk))
   141  	c.user.op(func(cv *cacheVal) {
   142  		err = cv.mount.Rename(chunkTempFilename, chunkTarget)
   143  	})
   144  	if err != nil {
   145  		return
   146  	}
   147  
   148  	// Check that all chunks are uploaded.
   149  	// This is very inefficient, the server has to check that it has all the
   150  	// chunks after each uploaded chunk.
   151  	// A two-phase upload like DropBox is better, because the server will
   152  	// assembly the chunks when the client asks for it.
   153  	numEntries := 0
   154  	c.user.op(func(cv *cacheVal) {
   155  		var dir *cephfs2.Directory
   156  		var entry *cephfs2.DirEntry
   157  		var chunkFile, assembledFile *cephfs2.File
   158  
   159  		dir, err = cv.mount.OpenDir(chunksFolderName)
   160  		defer closeDir(dir)
   161  
   162  		for entry, err = dir.ReadDir(); entry != nil && err == nil; entry, err = dir.ReadDir() {
   163  			numEntries++
   164  		}
   165  		// to remove . and ..
   166  		numEntries -= 2
   167  
   168  		if err != nil || numEntries < chunkInfo.TotalChunks {
   169  			return
   170  		}
   171  
   172  		chunk = filepath.Join(c.chunkFolder, c.getChunkTempFileName())
   173  		assembledFile, err = cv.mount.Open(chunk, os.O_CREATE|os.O_WRONLY, filePermDefault)
   174  		defer closeFile(assembledFile)
   175  		defer deleteFile(cv.mount, chunk)
   176  		if err != nil {
   177  			return
   178  		}
   179  
   180  		for i := 0; i < numEntries; i++ {
   181  			target := filepath.Join(chunksFolderName, strconv.Itoa(i))
   182  
   183  			chunkFile, err = cv.mount.Open(target, os.O_RDONLY, 0)
   184  			if err != nil {
   185  				return
   186  			}
   187  			_, err = io.Copy(assembledFile, chunkFile)
   188  			closeFile(chunkFile)
   189  			if err != nil {
   190  				return
   191  			}
   192  		}
   193  
   194  		// necessary approach in case assembly fails
   195  		for i := 0; i < numEntries; i++ {
   196  			target := filepath.Join(chunksFolderName, strconv.Itoa(i))
   197  			err = cv.mount.Unlink(target)
   198  			if err != nil {
   199  				return
   200  			}
   201  		}
   202  		_ = cv.mount.Unlink(chunksFolderName)
   203  	})
   204  
   205  	return true, chunk, nil
   206  }
   207  
   208  // WriteChunk saves an intermediate chunk temporarily and assembles all chunks
   209  // once the final one is received.
   210  func (c *ChunkHandler) WriteChunk(fn string, r io.ReadCloser) (string, string, error) {
   211  	finish, chunk, err := c.saveChunk(fn, r)
   212  	if err != nil {
   213  		return "", "", err
   214  	}
   215  
   216  	if !finish {
   217  		return "", "", nil
   218  	}
   219  
   220  	chunkInfo, err := GetChunkBLOBInfo(fn)
   221  	if err != nil {
   222  		return "", "", err
   223  	}
   224  
   225  	return chunkInfo.Path, chunk, nil
   226  
   227  	// TODO(labkode): implement old chunking
   228  
   229  	/*
   230  		req2 := &provider.StartWriteSessionRequest{}
   231  		res2, err := client.StartWriteSession(ctx, req2)
   232  		if err != nil {
   233  			logger.Error(ctx, err)
   234  			w.WriteHeader(http.StatusInternalServerError)
   235  			return
   236  		}
   237  
   238  		if res2.Status.Code != rpc.Code_CODE_OK {
   239  			logger.Println(ctx, res2.Status)
   240  			w.WriteHeader(http.StatusInternalServerError)
   241  			return
   242  		}
   243  
   244  		sessID := res2.SessionId
   245  		logger.Build().Str("sessID", sessID).Msg(ctx, "got write session id")
   246  
   247  		stream, err := client.Write(ctx)
   248  		if err != nil {
   249  			logger.Error(ctx, err)
   250  			w.WriteHeader(http.StatusInternalServerError)
   251  			return
   252  		}
   253  
   254  		buffer := make([]byte, 1024*1024*3)
   255  		var offset uint64
   256  		var numChunks uint64
   257  
   258  		for {
   259  			n, err := fd.Read(buffer)
   260  			if n > 0 {
   261  				req := &provider.WriteRequest{Data: buffer, Length: uint64(n), SessionId: sessID, Offset: offset}
   262  				err = stream.Send(req)
   263  				if err != nil {
   264  					logger.Error(ctx, err)
   265  					w.WriteHeader(http.StatusInternalServerError)
   266  					return
   267  				}
   268  
   269  				numChunks++
   270  				offset += uint64(n)
   271  			}
   272  
   273  			if err == io.EOF {
   274  				break
   275  			}
   276  
   277  			if err != nil {
   278  				logger.Error(ctx, err)
   279  				w.WriteHeader(http.StatusInternalServerError)
   280  				return
   281  			}
   282  		}
   283  
   284  		res3, err := stream.CloseAndRecv()
   285  		if err != nil {
   286  			logger.Error(ctx, err)
   287  			w.WriteHeader(http.StatusInternalServerError)
   288  			return
   289  		}
   290  
   291  		if res3.Status.Code != rpc.Code_CODE_OK {
   292  			logger.Println(ctx, err)
   293  			w.WriteHeader(http.StatusInternalServerError)
   294  			return
   295  		}
   296  
   297  		req4 := &provider.FinishWriteSessionRequest{Filename: chunkInfo.path, SessionId: sessID}
   298  		res4, err := client.FinishWriteSession(ctx, req4)
   299  		if err != nil {
   300  			logger.Error(ctx, err)
   301  			w.WriteHeader(http.StatusInternalServerError)
   302  			return
   303  		}
   304  
   305  		if res4.Status.Code != rpc.Code_CODE_OK {
   306  			logger.Println(ctx, res4.Status)
   307  			w.WriteHeader(http.StatusInternalServerError)
   308  			return
   309  		}
   310  
   311  		req.Filename = chunkInfo.path
   312  		res, err = client.Stat(ctx, req)
   313  		if err != nil {
   314  			logger.Error(ctx, err)
   315  			w.WriteHeader(http.StatusInternalServerError)
   316  			return
   317  		}
   318  
   319  		if res.Status.Code != rpc.Code_CODE_OK {
   320  			logger.Println(ctx, res.Status)
   321  			w.WriteHeader(http.StatusInternalServerError)
   322  			return
   323  		}
   324  
   325  		md2 := res.Metadata
   326  
   327  		w.Header().Add("Content-Type", md2.Mime)
   328  		w.Header().Set("ETag", md2.Etag)
   329  		w.Header().Set("OC-FileId", md2.Id)
   330  		w.Header().Set("OC-ETag", md2.Etag)
   331  		t := time.Unix(int64(md2.Mtime), 0)
   332  		lastModifiedString := t.Format(time.RFC1123Z)
   333  		w.Header().Set("Last-Modified", lastModifiedString)
   334  		w.Header().Set("X-OC-MTime", "accepted")
   335  
   336  		if md == nil {
   337  			w.WriteHeader(http.StatusCreated)
   338  			return
   339  		}
   340  
   341  		w.WriteHeader(http.StatusNoContent)
   342  		return
   343  	*/
   344  }