github.com/cs3org/reva/v2@v2.27.7/pkg/storage/utils/chunking/chunking.go (about)

     1  // Copyright 2018-2021 CERN
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // In applying this license, CERN does not waive the privileges and immunities
    16  // granted to it by virtue of its status as an Intergovernmental Organization
    17  // or submit itself to any jurisdiction.
    18  
    19  package chunking
    20  
    21  import (
    22  	"fmt"
    23  	"io"
    24  	"os"
    25  	"path/filepath"
    26  	"regexp"
    27  	"strconv"
    28  	"strings"
    29  )
    30  
    31  var (
    32  	chunkingPathRE = regexp.MustCompile(`-chunking-\w+-[0-9]+-[0-9]+$`)
    33  )
    34  
    35  // IsChunked checks if a given path refers to a chunk or not
    36  func IsChunked(fn string) bool {
    37  	// FIXME: also need to check whether the OC-Chunked header is set
    38  	return chunkingPathRE.MatchString(fn)
    39  }
    40  
    41  // ChunkBLOBInfo stores info about a particular chunk
    42  type ChunkBLOBInfo struct {
    43  	Path         string
    44  	TransferID   string
    45  	TotalChunks  int
    46  	CurrentChunk int
    47  }
    48  
    49  // Not using the resource path in the chunk folder name allows uploading to
    50  // the same folder after a move without having to restart the chunk upload
    51  func (c *ChunkBLOBInfo) uploadID() string {
    52  	return fmt.Sprintf("chunking-%s-%d", c.TransferID, c.TotalChunks)
    53  }
    54  
    55  // GetChunkBLOBInfo decodes a chunk name to retrieve info about it.
    56  func GetChunkBLOBInfo(path string) (*ChunkBLOBInfo, error) {
    57  	parts := strings.Split(path, "-chunking-")
    58  	tail := strings.Split(parts[1], "-")
    59  
    60  	totalChunks, err := strconv.Atoi(tail[1])
    61  	if err != nil {
    62  		return nil, err
    63  	}
    64  
    65  	currentChunk, err := strconv.Atoi(tail[2])
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	if currentChunk >= totalChunks {
    70  		return nil, fmt.Errorf("current chunk:%d exceeds total number of chunks:%d", currentChunk, totalChunks)
    71  	}
    72  
    73  	return &ChunkBLOBInfo{
    74  		Path:         parts[0],
    75  		TransferID:   tail[0],
    76  		TotalChunks:  totalChunks,
    77  		CurrentChunk: currentChunk,
    78  	}, nil
    79  }
    80  
    81  // ChunkHandler manages chunked uploads, storing the chunks in a temporary directory
    82  // until it gets the final chunk which is then returned.
    83  type ChunkHandler struct {
    84  	ChunkFolder string `mapstructure:"chunk_folder"`
    85  }
    86  
    87  // NewChunkHandler creates a handler for chunked uploads.
    88  func NewChunkHandler(chunkFolder string) *ChunkHandler {
    89  	return &ChunkHandler{chunkFolder}
    90  }
    91  
    92  func (c *ChunkHandler) createChunkTempFile() (string, *os.File, error) {
    93  	file, err := os.CreateTemp(fmt.Sprintf("/%s", c.ChunkFolder), "")
    94  	if err != nil {
    95  		return "", nil, err
    96  	}
    97  
    98  	return file.Name(), file, nil
    99  }
   100  
   101  func (c *ChunkHandler) getChunkFolderName(i *ChunkBLOBInfo) (string, error) {
   102  	path := filepath.Join("/", c.ChunkFolder, filepath.Join("/", i.uploadID()))
   103  	if err := os.MkdirAll(path, 0755); err != nil {
   104  		return "", err
   105  	}
   106  	return path, nil
   107  }
   108  
   109  func (c *ChunkHandler) saveChunk(path string, r io.ReadCloser) (bool, string, error) {
   110  	chunkInfo, err := GetChunkBLOBInfo(path)
   111  	if err != nil {
   112  		err := fmt.Errorf("error getting chunk info from path: %s", path)
   113  		return false, "", err
   114  	}
   115  
   116  	chunkTempFilename, chunkTempFile, err := c.createChunkTempFile()
   117  	if err != nil {
   118  		return false, "", err
   119  	}
   120  	defer chunkTempFile.Close()
   121  
   122  	if _, err := io.Copy(chunkTempFile, r); err != nil {
   123  		return false, "", err
   124  	}
   125  
   126  	// force close of the file here because if it is the last chunk to
   127  	// assemble the big file we must have all the chunks already closed.
   128  	if err = chunkTempFile.Close(); err != nil {
   129  		return false, "", err
   130  	}
   131  
   132  	chunksFolderName, err := c.getChunkFolderName(chunkInfo)
   133  	if err != nil {
   134  		return false, "", err
   135  	}
   136  	// c.logger.Info().Log("chunkfolder", chunksFolderName)
   137  
   138  	chunkTarget := filepath.Join(chunksFolderName, strconv.Itoa(chunkInfo.CurrentChunk))
   139  	if err = os.Rename(chunkTempFilename, chunkTarget); err != nil {
   140  		return false, "", err
   141  	}
   142  
   143  	// Check that all chunks are uploaded.
   144  	// This is very inefficient, the server has to check that it has all the
   145  	// chunks after each uploaded chunk.
   146  	// A two-phase upload like DropBox is better, because the server will
   147  	// assembly the chunks when the client asks for it.
   148  	chunksFolder, err := os.Open(chunksFolderName)
   149  	if err != nil {
   150  		return false, "", err
   151  	}
   152  	defer chunksFolder.Close()
   153  
   154  	// read all the chunks inside the chunk folder; -1 == all
   155  	chunks, err := chunksFolder.Readdir(-1)
   156  	if err != nil {
   157  		return false, "", err
   158  	}
   159  
   160  	// there are still some chunks to be uploaded.
   161  	// we return CodeUploadIsPartial to notify upper layers that the upload is still
   162  	// not complete and requires more actions.
   163  	// This code is needed to notify the owncloud webservice that the upload has not yet been
   164  	// completed and needs to continue uploading chunks.
   165  	if len(chunks) < chunkInfo.TotalChunks {
   166  		return false, "", nil
   167  	}
   168  
   169  	assembledFileName, assembledFile, err := c.createChunkTempFile()
   170  	if err != nil {
   171  		return false, "", err
   172  	}
   173  	defer assembledFile.Close()
   174  
   175  	// walk all chunks and append to assembled file
   176  	for i := range chunks {
   177  		target := filepath.Join(chunksFolderName, strconv.Itoa(i))
   178  
   179  		chunk, err := os.Open(target)
   180  		if err != nil {
   181  			return false, "", err
   182  		}
   183  		defer chunk.Close()
   184  
   185  		if _, err = io.Copy(assembledFile, chunk); err != nil {
   186  			return false, "", err
   187  		}
   188  
   189  		// we close the chunk here because if the assembled file contains hundreds of chunks
   190  		// we will end up with hundreds of open file descriptors
   191  		if err = chunk.Close(); err != nil {
   192  			return false, "", err
   193  
   194  		}
   195  	}
   196  
   197  	// at this point the assembled file is complete
   198  	// so we free space removing the chunks folder
   199  	defer os.RemoveAll(chunksFolderName)
   200  
   201  	return true, assembledFileName, nil
   202  }
   203  
   204  // WriteChunk saves an intermediate chunk temporarily and assembles all chunks
   205  // once the final one is received.
   206  func (c *ChunkHandler) WriteChunk(fn string, r io.ReadCloser) (string, string, error) {
   207  	finish, chunk, err := c.saveChunk(fn, r)
   208  	if err != nil {
   209  		return "", "", err
   210  	}
   211  
   212  	if !finish {
   213  		return "", "", nil
   214  	}
   215  
   216  	chunkInfo, err := GetChunkBLOBInfo(fn)
   217  	if err != nil {
   218  		return "", "", err
   219  	}
   220  
   221  	return chunkInfo.Path, chunk, nil
   222  
   223  	// TODO(labkode): implement old chunking
   224  
   225  	/*
   226  		req2 := &provider.StartWriteSessionRequest{}
   227  		res2, err := client.StartWriteSession(ctx, req2)
   228  		if err != nil {
   229  			logger.Error(ctx, err)
   230  			w.WriteHeader(http.StatusInternalServerError)
   231  			return
   232  		}
   233  
   234  		if res2.Status.Code != rpc.Code_CODE_OK {
   235  			logger.Println(ctx, res2.Status)
   236  			w.WriteHeader(http.StatusInternalServerError)
   237  			return
   238  		}
   239  
   240  		sessID := res2.SessionId
   241  		logger.Build().Str("sessID", sessID).Msg(ctx, "got write session id")
   242  
   243  		stream, err := client.Write(ctx)
   244  		if err != nil {
   245  			logger.Error(ctx, err)
   246  			w.WriteHeader(http.StatusInternalServerError)
   247  			return
   248  		}
   249  
   250  		buffer := make([]byte, 1024*1024*3)
   251  		var offset uint64
   252  		var numChunks uint64
   253  
   254  		for {
   255  			n, err := fd.Read(buffer)
   256  			if n > 0 {
   257  				req := &provider.WriteRequest{Data: buffer, Length: uint64(n), SessionId: sessID, Offset: offset}
   258  				err = stream.Send(req)
   259  				if err != nil {
   260  					logger.Error(ctx, err)
   261  					w.WriteHeader(http.StatusInternalServerError)
   262  					return
   263  				}
   264  
   265  				numChunks++
   266  				offset += uint64(n)
   267  			}
   268  
   269  			if err == io.EOF {
   270  				break
   271  			}
   272  
   273  			if err != nil {
   274  				logger.Error(ctx, err)
   275  				w.WriteHeader(http.StatusInternalServerError)
   276  				return
   277  			}
   278  		}
   279  
   280  		res3, err := stream.CloseAndRecv()
   281  		if err != nil {
   282  			logger.Error(ctx, err)
   283  			w.WriteHeader(http.StatusInternalServerError)
   284  			return
   285  		}
   286  
   287  		if res3.Status.Code != rpc.Code_CODE_OK {
   288  			logger.Println(ctx, err)
   289  			w.WriteHeader(http.StatusInternalServerError)
   290  			return
   291  		}
   292  
   293  		req4 := &provider.FinishWriteSessionRequest{Filename: chunkInfo.path, SessionId: sessID}
   294  		res4, err := client.FinishWriteSession(ctx, req4)
   295  		if err != nil {
   296  			logger.Error(ctx, err)
   297  			w.WriteHeader(http.StatusInternalServerError)
   298  			return
   299  		}
   300  
   301  		if res4.Status.Code != rpc.Code_CODE_OK {
   302  			logger.Println(ctx, res4.Status)
   303  			w.WriteHeader(http.StatusInternalServerError)
   304  			return
   305  		}
   306  
   307  		req.Filename = chunkInfo.path
   308  		res, err = client.Stat(ctx, req)
   309  		if err != nil {
   310  			logger.Error(ctx, err)
   311  			w.WriteHeader(http.StatusInternalServerError)
   312  			return
   313  		}
   314  
   315  		if res.Status.Code != rpc.Code_CODE_OK {
   316  			logger.Println(ctx, res.Status)
   317  			w.WriteHeader(http.StatusInternalServerError)
   318  			return
   319  		}
   320  
   321  		md2 := res.Metadata
   322  
   323  		w.Header().Add("Content-Type", md2.Mime)
   324  		w.Header().Set("ETag", md2.Etag)
   325  		w.Header().Set("OC-FileId", md2.Id)
   326  		w.Header().Set("OC-ETag", md2.Etag)
   327  		t := time.Unix(int64(md2.Mtime), 0)
   328  		lastModifiedString := t.Format(time.RFC1123Z)
   329  		w.Header().Set("Last-Modified", lastModifiedString)
   330  		w.Header().Set("X-OC-MTime", "accepted")
   331  
   332  		if md == nil {
   333  			w.WriteHeader(http.StatusCreated)
   334  			return
   335  		}
   336  
   337  		w.WriteHeader(http.StatusNoContent)
   338  		return
   339  	*/
   340  }