github.com/jstaf/onedriver@v0.14.2-0.20240420231225-f07678f9e6ef/fs/upload_session.go (about) 1 package fs 2 3 import ( 4 "bytes" 5 "encoding/json" 6 "errors" 7 "fmt" 8 "io/ioutil" 9 "math" 10 "net/http" 11 "net/url" 12 "strconv" 13 "strings" 14 "sync" 15 "time" 16 17 "github.com/jstaf/onedriver/fs/graph" 18 "github.com/rs/zerolog/log" 19 ) 20 21 const ( 22 // 10MB is the recommended upload size according to the graph API docs 23 uploadChunkSize uint64 = 10 * 1024 * 1024 24 25 // uploads larget than 4MB must use a formal upload session 26 uploadLargeSize uint64 = 4 * 1024 * 1024 27 ) 28 29 // upload states 30 const ( 31 uploadNotStarted = iota 32 uploadStarted 33 uploadComplete 34 uploadErrored 35 ) 36 37 // UploadSession contains a snapshot of the file we're uploading. We have to 38 // take the snapshot or the file may have changed on disk during upload (which 39 // would break the upload). It is not recommended to directly deserialize into 40 // this structure from API responses in case Microsoft ever adds a size, data, 41 // or modTime field to the response. 42 type UploadSession struct { 43 ID string `json:"id"` 44 OldID string `json:"oldID"` 45 ParentID string `json:"parentID"` 46 NodeID uint64 `json:"nodeID"` 47 Name string `json:"name"` 48 ExpirationDateTime time.Time `json:"expirationDateTime"` 49 Size uint64 `json:"size,omitempty"` 50 Data []byte `json:"data,omitempty"` 51 QuickXORHash string `json:"quickxorhash,omitempty"` 52 ModTime time.Time `json:"modTime,omitempty"` 53 retries int 54 55 sync.Mutex 56 UploadURL string `json:"uploadUrl"` 57 ETag string `json:"eTag,omitempty"` 58 state int 59 error // embedded error tracks errors that killed an upload 60 } 61 62 // MarshalJSON implements a custom JSON marshaler to avoid race conditions 63 func (u *UploadSession) MarshalJSON() ([]byte, error) { 64 u.Lock() 65 defer u.Unlock() 66 type SerializeableUploadSession UploadSession 67 return json.Marshal((*SerializeableUploadSession)(u)) 68 } 69 70 // UploadSessionPost is the initial post used to create an upload session 71 type UploadSessionPost struct { 72 Name string `json:"name,omitempty"` 73 ConflictBehavior string `json:"@microsoft.graph.conflictBehavior,omitempty"` 74 FileSystemInfo `json:"fileSystemInfo,omitempty"` 75 } 76 77 // FileSystemInfo carries the filesystem metadata like Mtime/Atime 78 type FileSystemInfo struct { 79 LastModifiedDateTime time.Time `json:"lastModifiedDateTime,omitempty"` 80 } 81 82 func (u *UploadSession) getState() int { 83 u.Lock() 84 defer u.Unlock() 85 return u.state 86 } 87 88 // setState is just a helper method to set the UploadSession state and make error checking 89 // a little more straightforwards. 90 func (u *UploadSession) setState(state int, err error) error { 91 u.Lock() 92 u.state = state 93 u.error = err 94 u.Unlock() 95 return err 96 } 97 98 // NewUploadSession wraps an upload of a file into an UploadSession struct 99 // responsible for performing uploads for a file. 100 func NewUploadSession(inode *Inode, data *[]byte) (*UploadSession, error) { 101 if data == nil { 102 return nil, errors.New("data to upload cannot be nil") 103 } 104 105 // create a generic session for all files 106 inode.RLock() 107 session := UploadSession{ 108 ID: inode.DriveItem.ID, 109 OldID: inode.DriveItem.ID, 110 ParentID: inode.DriveItem.Parent.ID, 111 NodeID: inode.nodeID, 112 Name: inode.DriveItem.Name, 113 Data: *data, 114 ModTime: *inode.DriveItem.ModTime, 115 } 116 inode.RUnlock() 117 118 session.Size = uint64(len(*data)) // just in case it somehow differs 119 session.QuickXORHash = graph.QuickXORHash(data) 120 return &session, nil 121 } 122 123 // cancel the upload session by deleting the temp file at the endpoint. 124 func (u *UploadSession) cancel(auth *graph.Auth) { 125 u.Lock() 126 // small upload sessions will also have an empty UploadURL in addition to 127 // uninitialized large file uploads. 128 nonemptyURL := u.UploadURL != "" 129 u.Unlock() 130 if nonemptyURL { 131 state := u.getState() 132 if state == uploadStarted || state == uploadErrored { 133 // dont care about result, this is purely us being polite to the server 134 go graph.Delete(u.UploadURL, auth) 135 } 136 } 137 } 138 139 // Internal method used for uploading individual chunks of a DriveItem. We have 140 // to make things this way because the internal Put func doesn't work all that 141 // well when we need to add custom headers. Will return without an error if 142 // irrespective of HTTP status (errors are reserved for stuff that prevented 143 // the HTTP request at all). 144 func (u *UploadSession) uploadChunk(auth *graph.Auth, offset uint64) ([]byte, int, error) { 145 u.Lock() 146 url := u.UploadURL 147 if url == "" { 148 u.Unlock() 149 return nil, -1, errors.New("UploadSession UploadURL cannot be empty") 150 } 151 u.Unlock() 152 153 // how much of the file are we going to upload? 154 end := offset + uploadChunkSize 155 var reqChunkSize uint64 156 if end > u.Size { 157 end = u.Size 158 reqChunkSize = end - offset + 1 159 } 160 if offset > u.Size { 161 return nil, -1, errors.New("offset cannot be larger than DriveItem size") 162 } 163 164 auth.Refresh() 165 166 client := &http.Client{} 167 request, _ := http.NewRequest( 168 "PUT", 169 url, 170 bytes.NewReader((u.Data)[offset:end]), 171 ) 172 // no Authorization header - it will throw a 401 if present 173 request.Header.Add("Content-Length", strconv.Itoa(int(reqChunkSize))) 174 frags := fmt.Sprintf("bytes %d-%d/%d", offset, end-1, u.Size) 175 log.Info().Str("id", u.ID).Msg("Uploading " + frags) 176 request.Header.Add("Content-Range", frags) 177 178 resp, err := client.Do(request) 179 if err != nil { 180 // this is a serious error, not simply one with a non-200 return code 181 return nil, -1, err 182 } 183 defer resp.Body.Close() 184 response, _ := ioutil.ReadAll(resp.Body) 185 return response, resp.StatusCode, nil 186 } 187 188 // Upload copies the file's contents to the server. Should only be called as a 189 // goroutine, or it can potentially block for a very long time. The uploadSession.error 190 // field contains errors to be handled if called as a goroutine. 191 func (u *UploadSession) Upload(auth *graph.Auth) error { 192 log.Info().Str("id", u.ID).Str("name", u.Name).Msg("Uploading file.") 193 u.setState(uploadStarted, nil) 194 195 var uploadPath string 196 var resp []byte 197 if u.Size < uploadLargeSize { 198 // Small upload sessions use a simple PUT request, but this does not support 199 // adding file modification times. We don't really care though, because 200 // after some experimentation, the Microsoft API doesn't seem to properly 201 // support these either (this is why we have to use etags). 202 if isLocalID(u.ID) { 203 uploadPath = fmt.Sprintf( 204 "/me/drive/items/%s:/%s:/content", 205 url.PathEscape(u.ParentID), 206 url.PathEscape(u.Name), 207 ) 208 } else { 209 uploadPath = fmt.Sprintf( 210 "/me/drive/items/%s/content", 211 url.PathEscape(u.ID), 212 ) 213 } 214 // small files handled in this block 215 var err error 216 resp, err = graph.Put(uploadPath, auth, bytes.NewReader(u.Data)) 217 if err != nil && strings.Contains(err.Error(), "resourceModified") { 218 // retry the request after a second, likely the server is having issues 219 time.Sleep(time.Second) 220 resp, err = graph.Put(uploadPath, auth, bytes.NewReader(u.Data)) 221 } 222 if err != nil { 223 return u.setState(uploadErrored, fmt.Errorf("small upload failed: %w", err)) 224 } 225 } else { 226 if isLocalID(u.ID) { 227 uploadPath = fmt.Sprintf( 228 "/me/drive/items/%s:/%s:/createUploadSession", 229 url.PathEscape(u.ParentID), 230 url.PathEscape(u.Name), 231 ) 232 } else { 233 uploadPath = fmt.Sprintf( 234 "/me/drive/items/%s/createUploadSession", 235 url.PathEscape(u.ID), 236 ) 237 } 238 sessionPostData, _ := json.Marshal(UploadSessionPost{ 239 ConflictBehavior: "replace", 240 FileSystemInfo: FileSystemInfo{ 241 LastModifiedDateTime: u.ModTime, 242 }, 243 }) 244 resp, err := graph.Post(uploadPath, auth, bytes.NewReader(sessionPostData)) 245 if err != nil { 246 return u.setState(uploadErrored, fmt.Errorf("failed to create upload session: %w", err)) 247 } 248 249 // populate UploadURL/expiration - we unmarshal into a fresh session here 250 // just in case the API does something silly at a later date and overwrites 251 // a field it shouldn't. 252 tmp := UploadSession{} 253 if err = json.Unmarshal(resp, &tmp); err != nil { 254 return u.setState(uploadErrored, 255 fmt.Errorf("could not unmarshal upload session post response: %w", err)) 256 } 257 u.Lock() 258 u.UploadURL = tmp.UploadURL 259 u.ExpirationDateTime = tmp.ExpirationDateTime 260 u.Unlock() 261 262 // api upload session created successfully, now do actual content upload 263 var status int 264 nchunks := int(math.Ceil(float64(u.Size) / float64(uploadChunkSize))) 265 for i := 0; i < nchunks; i++ { 266 resp, status, err = u.uploadChunk(auth, uint64(i)*uploadChunkSize) 267 if err != nil { 268 return u.setState(uploadErrored, fmt.Errorf("failed to perform chunk upload: %w", err)) 269 } 270 271 // retry server-side failures with an exponential back-off strategy. Will not 272 // exit this loop unless it receives a non 5xx error or serious failure 273 for backoff := 1; status >= 500; backoff *= 2 { 274 log.Error(). 275 Str("id", u.ID). 276 Str("name", u.Name). 277 Int("chunk", i). 278 Int("nchunks", nchunks). 279 Int("status", status). 280 Msgf("The OneDrive server is having issues, retrying chunk upload in %ds.", backoff) 281 time.Sleep(time.Duration(backoff) * time.Second) 282 resp, status, err = u.uploadChunk(auth, uint64(i)*uploadChunkSize) 283 if err != nil { // a serious, non 4xx/5xx error 284 return u.setState(uploadErrored, fmt.Errorf("failed to perform chunk upload: %w", err)) 285 } 286 } 287 288 // handle client-side errors 289 if status >= 400 { 290 return u.setState(uploadErrored, fmt.Errorf("error uploading chunk - HTTP %d: %s", status, string(resp))) 291 } 292 } 293 } 294 295 // server has indicated that the upload was successful - now we check to verify the 296 // checksum is what it's supposed to be. 297 remote := graph.DriveItem{} 298 if err := json.Unmarshal(resp, &remote); err != nil { 299 if len(resp) == 0 { 300 // the API frequently just returns a 0-byte response for completed 301 // multipart uploads, so we manually fetch the newly updated item 302 var remotePtr *graph.DriveItem 303 if isLocalID(u.ID) { 304 remotePtr, err = graph.GetItemChild(u.ParentID, u.Name, auth) 305 } else { 306 remotePtr, err = graph.GetItem(u.ID, auth) 307 } 308 if err == nil { 309 remote = *remotePtr 310 } else { 311 return u.setState(uploadErrored, 312 fmt.Errorf("failed to get item post-upload: %w", err)) 313 } 314 } else { 315 return u.setState(uploadErrored, 316 fmt.Errorf("could not unmarshal response: %w: %s", err, string(resp)), 317 ) 318 } 319 } 320 if remote.File == nil && remote.Size != u.Size { 321 // if we are absolutely pounding the microsoft API, a remote item may sometimes 322 // come back without checksums, so we check the size of the uploaded item instead. 323 return u.setState(uploadErrored, errors.New("size mismatch when remote checksums did not exist")) 324 } else if !remote.VerifyChecksum(u.QuickXORHash) { 325 return u.setState(uploadErrored, errors.New("remote checksum did not match")) 326 } 327 // update the UploadSession's ID in the event that we exchange a local for a remote ID 328 u.Lock() 329 u.ID = remote.ID 330 u.ETag = remote.ETag 331 u.Unlock() 332 return u.setState(uploadComplete, nil) 333 }