github.com/jstaf/onedriver@v0.14.2-0.20240420231225-f07678f9e6ef/fs/upload_session.go (about)

     1  package fs
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"errors"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"math"
    10  	"net/http"
    11  	"net/url"
    12  	"strconv"
    13  	"strings"
    14  	"sync"
    15  	"time"
    16  
    17  	"github.com/jstaf/onedriver/fs/graph"
    18  	"github.com/rs/zerolog/log"
    19  )
    20  
    21  const (
    22  	// 10MB is the recommended upload size according to the graph API docs
    23  	uploadChunkSize uint64 = 10 * 1024 * 1024
    24  
    25  	// uploads larget than 4MB must use a formal upload session
    26  	uploadLargeSize uint64 = 4 * 1024 * 1024
    27  )
    28  
    29  // upload states
    30  const (
    31  	uploadNotStarted = iota
    32  	uploadStarted
    33  	uploadComplete
    34  	uploadErrored
    35  )
    36  
    37  // UploadSession contains a snapshot of the file we're uploading. We have to
    38  // take the snapshot or the file may have changed on disk during upload (which
    39  // would break the upload). It is not recommended to directly deserialize into
    40  // this structure from API responses in case Microsoft ever adds a size, data,
    41  // or modTime field to the response.
    42  type UploadSession struct {
    43  	ID                 string    `json:"id"`
    44  	OldID              string    `json:"oldID"`
    45  	ParentID           string    `json:"parentID"`
    46  	NodeID             uint64    `json:"nodeID"`
    47  	Name               string    `json:"name"`
    48  	ExpirationDateTime time.Time `json:"expirationDateTime"`
    49  	Size               uint64    `json:"size,omitempty"`
    50  	Data               []byte    `json:"data,omitempty"`
    51  	QuickXORHash       string    `json:"quickxorhash,omitempty"`
    52  	ModTime            time.Time `json:"modTime,omitempty"`
    53  	retries            int
    54  
    55  	sync.Mutex
    56  	UploadURL string `json:"uploadUrl"`
    57  	ETag      string `json:"eTag,omitempty"`
    58  	state     int
    59  	error     // embedded error tracks errors that killed an upload
    60  }
    61  
    62  // MarshalJSON implements a custom JSON marshaler to avoid race conditions
    63  func (u *UploadSession) MarshalJSON() ([]byte, error) {
    64  	u.Lock()
    65  	defer u.Unlock()
    66  	type SerializeableUploadSession UploadSession
    67  	return json.Marshal((*SerializeableUploadSession)(u))
    68  }
    69  
    70  // UploadSessionPost is the initial post used to create an upload session
    71  type UploadSessionPost struct {
    72  	Name             string `json:"name,omitempty"`
    73  	ConflictBehavior string `json:"@microsoft.graph.conflictBehavior,omitempty"`
    74  	FileSystemInfo   `json:"fileSystemInfo,omitempty"`
    75  }
    76  
    77  // FileSystemInfo carries the filesystem metadata like Mtime/Atime
    78  type FileSystemInfo struct {
    79  	LastModifiedDateTime time.Time `json:"lastModifiedDateTime,omitempty"`
    80  }
    81  
    82  func (u *UploadSession) getState() int {
    83  	u.Lock()
    84  	defer u.Unlock()
    85  	return u.state
    86  }
    87  
    88  // setState is just a helper method to set the UploadSession state and make error checking
    89  // a little more straightforwards.
    90  func (u *UploadSession) setState(state int, err error) error {
    91  	u.Lock()
    92  	u.state = state
    93  	u.error = err
    94  	u.Unlock()
    95  	return err
    96  }
    97  
    98  // NewUploadSession wraps an upload of a file into an UploadSession struct
    99  // responsible for performing uploads for a file.
   100  func NewUploadSession(inode *Inode, data *[]byte) (*UploadSession, error) {
   101  	if data == nil {
   102  		return nil, errors.New("data to upload cannot be nil")
   103  	}
   104  
   105  	// create a generic session for all files
   106  	inode.RLock()
   107  	session := UploadSession{
   108  		ID:       inode.DriveItem.ID,
   109  		OldID:    inode.DriveItem.ID,
   110  		ParentID: inode.DriveItem.Parent.ID,
   111  		NodeID:   inode.nodeID,
   112  		Name:     inode.DriveItem.Name,
   113  		Data:     *data,
   114  		ModTime:  *inode.DriveItem.ModTime,
   115  	}
   116  	inode.RUnlock()
   117  
   118  	session.Size = uint64(len(*data)) // just in case it somehow differs
   119  	session.QuickXORHash = graph.QuickXORHash(data)
   120  	return &session, nil
   121  }
   122  
   123  // cancel the upload session by deleting the temp file at the endpoint.
   124  func (u *UploadSession) cancel(auth *graph.Auth) {
   125  	u.Lock()
   126  	// small upload sessions will also have an empty UploadURL in addition to
   127  	// uninitialized large file uploads.
   128  	nonemptyURL := u.UploadURL != ""
   129  	u.Unlock()
   130  	if nonemptyURL {
   131  		state := u.getState()
   132  		if state == uploadStarted || state == uploadErrored {
   133  			// dont care about result, this is purely us being polite to the server
   134  			go graph.Delete(u.UploadURL, auth)
   135  		}
   136  	}
   137  }
   138  
   139  // Internal method used for uploading individual chunks of a DriveItem. We have
   140  // to make things this way because the internal Put func doesn't work all that
   141  // well when we need to add custom headers. Will return without an error if
   142  // irrespective of HTTP status (errors are reserved for stuff that prevented
   143  // the HTTP request at all).
   144  func (u *UploadSession) uploadChunk(auth *graph.Auth, offset uint64) ([]byte, int, error) {
   145  	u.Lock()
   146  	url := u.UploadURL
   147  	if url == "" {
   148  		u.Unlock()
   149  		return nil, -1, errors.New("UploadSession UploadURL cannot be empty")
   150  	}
   151  	u.Unlock()
   152  
   153  	// how much of the file are we going to upload?
   154  	end := offset + uploadChunkSize
   155  	var reqChunkSize uint64
   156  	if end > u.Size {
   157  		end = u.Size
   158  		reqChunkSize = end - offset + 1
   159  	}
   160  	if offset > u.Size {
   161  		return nil, -1, errors.New("offset cannot be larger than DriveItem size")
   162  	}
   163  
   164  	auth.Refresh()
   165  
   166  	client := &http.Client{}
   167  	request, _ := http.NewRequest(
   168  		"PUT",
   169  		url,
   170  		bytes.NewReader((u.Data)[offset:end]),
   171  	)
   172  	// no Authorization header - it will throw a 401 if present
   173  	request.Header.Add("Content-Length", strconv.Itoa(int(reqChunkSize)))
   174  	frags := fmt.Sprintf("bytes %d-%d/%d", offset, end-1, u.Size)
   175  	log.Info().Str("id", u.ID).Msg("Uploading " + frags)
   176  	request.Header.Add("Content-Range", frags)
   177  
   178  	resp, err := client.Do(request)
   179  	if err != nil {
   180  		// this is a serious error, not simply one with a non-200 return code
   181  		return nil, -1, err
   182  	}
   183  	defer resp.Body.Close()
   184  	response, _ := ioutil.ReadAll(resp.Body)
   185  	return response, resp.StatusCode, nil
   186  }
   187  
   188  // Upload copies the file's contents to the server. Should only be called as a
   189  // goroutine, or it can potentially block for a very long time. The uploadSession.error
   190  // field contains errors to be handled if called as a goroutine.
   191  func (u *UploadSession) Upload(auth *graph.Auth) error {
   192  	log.Info().Str("id", u.ID).Str("name", u.Name).Msg("Uploading file.")
   193  	u.setState(uploadStarted, nil)
   194  
   195  	var uploadPath string
   196  	var resp []byte
   197  	if u.Size < uploadLargeSize {
   198  		// Small upload sessions use a simple PUT request, but this does not support
   199  		// adding file modification times. We don't really care though, because
   200  		// after some experimentation, the Microsoft API doesn't seem to properly
   201  		// support these either (this is why we have to use etags).
   202  		if isLocalID(u.ID) {
   203  			uploadPath = fmt.Sprintf(
   204  				"/me/drive/items/%s:/%s:/content",
   205  				url.PathEscape(u.ParentID),
   206  				url.PathEscape(u.Name),
   207  			)
   208  		} else {
   209  			uploadPath = fmt.Sprintf(
   210  				"/me/drive/items/%s/content",
   211  				url.PathEscape(u.ID),
   212  			)
   213  		}
   214  		// small files handled in this block
   215  		var err error
   216  		resp, err = graph.Put(uploadPath, auth, bytes.NewReader(u.Data))
   217  		if err != nil && strings.Contains(err.Error(), "resourceModified") {
   218  			// retry the request after a second, likely the server is having issues
   219  			time.Sleep(time.Second)
   220  			resp, err = graph.Put(uploadPath, auth, bytes.NewReader(u.Data))
   221  		}
   222  		if err != nil {
   223  			return u.setState(uploadErrored, fmt.Errorf("small upload failed: %w", err))
   224  		}
   225  	} else {
   226  		if isLocalID(u.ID) {
   227  			uploadPath = fmt.Sprintf(
   228  				"/me/drive/items/%s:/%s:/createUploadSession",
   229  				url.PathEscape(u.ParentID),
   230  				url.PathEscape(u.Name),
   231  			)
   232  		} else {
   233  			uploadPath = fmt.Sprintf(
   234  				"/me/drive/items/%s/createUploadSession",
   235  				url.PathEscape(u.ID),
   236  			)
   237  		}
   238  		sessionPostData, _ := json.Marshal(UploadSessionPost{
   239  			ConflictBehavior: "replace",
   240  			FileSystemInfo: FileSystemInfo{
   241  				LastModifiedDateTime: u.ModTime,
   242  			},
   243  		})
   244  		resp, err := graph.Post(uploadPath, auth, bytes.NewReader(sessionPostData))
   245  		if err != nil {
   246  			return u.setState(uploadErrored, fmt.Errorf("failed to create upload session: %w", err))
   247  		}
   248  
   249  		// populate UploadURL/expiration - we unmarshal into a fresh session here
   250  		// just in case the API does something silly at a later date and overwrites
   251  		// a field it shouldn't.
   252  		tmp := UploadSession{}
   253  		if err = json.Unmarshal(resp, &tmp); err != nil {
   254  			return u.setState(uploadErrored,
   255  				fmt.Errorf("could not unmarshal upload session post response: %w", err))
   256  		}
   257  		u.Lock()
   258  		u.UploadURL = tmp.UploadURL
   259  		u.ExpirationDateTime = tmp.ExpirationDateTime
   260  		u.Unlock()
   261  
   262  		// api upload session created successfully, now do actual content upload
   263  		var status int
   264  		nchunks := int(math.Ceil(float64(u.Size) / float64(uploadChunkSize)))
   265  		for i := 0; i < nchunks; i++ {
   266  			resp, status, err = u.uploadChunk(auth, uint64(i)*uploadChunkSize)
   267  			if err != nil {
   268  				return u.setState(uploadErrored, fmt.Errorf("failed to perform chunk upload: %w", err))
   269  			}
   270  
   271  			// retry server-side failures with an exponential back-off strategy. Will not
   272  			// exit this loop unless it receives a non 5xx error or serious failure
   273  			for backoff := 1; status >= 500; backoff *= 2 {
   274  				log.Error().
   275  					Str("id", u.ID).
   276  					Str("name", u.Name).
   277  					Int("chunk", i).
   278  					Int("nchunks", nchunks).
   279  					Int("status", status).
   280  					Msgf("The OneDrive server is having issues, retrying chunk upload in %ds.", backoff)
   281  				time.Sleep(time.Duration(backoff) * time.Second)
   282  				resp, status, err = u.uploadChunk(auth, uint64(i)*uploadChunkSize)
   283  				if err != nil { // a serious, non 4xx/5xx error
   284  					return u.setState(uploadErrored, fmt.Errorf("failed to perform chunk upload: %w", err))
   285  				}
   286  			}
   287  
   288  			// handle client-side errors
   289  			if status >= 400 {
   290  				return u.setState(uploadErrored, fmt.Errorf("error uploading chunk - HTTP %d: %s", status, string(resp)))
   291  			}
   292  		}
   293  	}
   294  
   295  	// server has indicated that the upload was successful - now we check to verify the
   296  	// checksum is what it's supposed to be.
   297  	remote := graph.DriveItem{}
   298  	if err := json.Unmarshal(resp, &remote); err != nil {
   299  		if len(resp) == 0 {
   300  			// the API frequently just returns a 0-byte response for completed
   301  			// multipart uploads, so we manually fetch the newly updated item
   302  			var remotePtr *graph.DriveItem
   303  			if isLocalID(u.ID) {
   304  				remotePtr, err = graph.GetItemChild(u.ParentID, u.Name, auth)
   305  			} else {
   306  				remotePtr, err = graph.GetItem(u.ID, auth)
   307  			}
   308  			if err == nil {
   309  				remote = *remotePtr
   310  			} else {
   311  				return u.setState(uploadErrored,
   312  					fmt.Errorf("failed to get item post-upload: %w", err))
   313  			}
   314  		} else {
   315  			return u.setState(uploadErrored,
   316  				fmt.Errorf("could not unmarshal response: %w: %s", err, string(resp)),
   317  			)
   318  		}
   319  	}
   320  	if remote.File == nil && remote.Size != u.Size {
   321  		// if we are absolutely pounding the microsoft API, a remote item may sometimes
   322  		// come back without checksums, so we check the size of the uploaded item instead.
   323  		return u.setState(uploadErrored, errors.New("size mismatch when remote checksums did not exist"))
   324  	} else if !remote.VerifyChecksum(u.QuickXORHash) {
   325  		return u.setState(uploadErrored, errors.New("remote checksum did not match"))
   326  	}
   327  	// update the UploadSession's ID in the event that we exchange a local for a remote ID
   328  	u.Lock()
   329  	u.ID = remote.ID
   330  	u.ETag = remote.ETag
   331  	u.Unlock()
   332  	return u.setState(uploadComplete, nil)
   333  }