tractor.dev/toolkit-go@v0.0.0-20241010005851-214d91207d07/engine/fs/githubfs/githubfs.go (about) 1 package githubfs 2 3 import ( 4 "bytes" 5 "encoding/base64" 6 "encoding/json" 7 "errors" 8 "fmt" 9 "io" 10 "net/http" 11 "os" 12 "path/filepath" 13 "strconv" 14 "strings" 15 "syscall" 16 "time" 17 18 "tractor.dev/toolkit-go/engine/fs" 19 ) 20 21 // TODO: Write requests require a commit message. See if there's a nice way 22 // to expose this to the user instead of a hardcoded message. 23 24 // TODO: Write requests can fail if requests are sent in parallel or too close together. 25 // Automatically stagger write requests to avoid this. 26 27 // Given a GitHub repository and access token, this filesystem will use the 28 // GitHub API to expose a read-write filesystem of the repository contents. 29 // Its root will contain all branches as directories. 30 type FS struct { 31 owner string 32 repo string 33 token string 34 35 branches map[string]Tree 36 branchesExpired bool 37 } 38 39 func New(owner, repoName, accessToken string) *FS { 40 return &FS{ 41 owner: owner, 42 repo: repoName, 43 token: accessToken, 44 branches: make(map[string]Tree), 45 branchesExpired: true, 46 } 47 } 48 49 type Tree struct { 50 Expired bool `json:"-"` 51 52 Sha string `json:"sha"` 53 URL string `json:"url"` 54 Items []TreeItem `json:"tree"` // TODO: use map[Path]TreeItem instead? 55 Truncated bool `json:"truncated"` 56 } 57 type TreeItem struct { 58 Path string `json:"path"` 59 Mode string `json:"mode"` 60 Type string `json:"type"` 61 Size int64 `json:"size"` 62 Sha string `json:"sha"` 63 URL string `json:"url"` 64 } 65 66 func (ti *TreeItem) toFileInfo(branch string) *fileInfo { 67 // TODO: mtime? 68 mode, _ := strconv.ParseUint(ti.Mode, 8, 32) 69 return &fileInfo{ 70 name: filepath.Base(ti.Path), 71 size: ti.Size, 72 isDir: ti.Type == "tree", 73 mode: fs.FileMode(mode), 74 branch: branch, 75 subpath: ti.Path, 76 sha: ti.Sha, 77 } 78 } 79 80 type ErrBadStatus struct { 81 status string 82 } 83 84 func (e ErrBadStatus) Error() string { 85 return "BadStatus: " + e.status 86 } 87 88 func (g *FS) apiRequest(method, url, acceptHeader string, body io.Reader) (*http.Response, error) { 89 req, err := http.NewRequest(method, url, body) 90 if err != nil { 91 return nil, err 92 } 93 req.Header.Add("Accept", acceptHeader) 94 req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", g.token)) 95 req.Header.Add("X-GitHub-Api-Version", "2022-11-28") 96 97 resp, err := http.DefaultClient.Do(req) 98 if err != nil { 99 return nil, err 100 } 101 102 if resp.StatusCode == 401 { 103 return resp, fs.ErrPermission 104 } 105 106 return resp, nil 107 } 108 109 // Every filesystem query is prefixed by a branch name, so `maybeUpdateBranches()` 110 // must be called for every query before accessing it's Tree. `maybeUpdateTree()` 111 // is only necessary when accessing Tree contents. 112 113 // Both in seconds. 114 // Optimize for least amount of Requests without visible loss of sync with remote. 115 const branchesExpiryPeriod = 5 116 const treeExpiryPeriod = 1 117 118 func (g *FS) maybeUpdateBranches() error { 119 if !g.branchesExpired { 120 return nil 121 } 122 123 g.branchesExpired = false 124 defer time.AfterFunc(branchesExpiryPeriod*time.Second, func() { g.branchesExpired = true }) 125 126 resp, err := g.apiRequest( 127 "GET", 128 fmt.Sprintf( 129 "https://api.github.com/repos/%s/%s/branches", 130 g.owner, g.repo, 131 ), 132 "application/vnd.github+json", 133 nil, 134 ) 135 if err != nil { 136 return err 137 } 138 if resp.StatusCode != 200 { 139 return ErrBadStatus{status: resp.Status} 140 } 141 defer resp.Body.Close() 142 143 var branches []struct { 144 Name string `json:"name"` 145 } 146 if err := json.NewDecoder(resp.Body).Decode(&branches); err != nil { 147 return err 148 } 149 150 // TODO: apply diff instead of clearing the whole thing? 151 clear(g.branches) 152 for _, branch := range branches { 153 g.branches[branch.Name] = Tree{Expired: true} 154 } 155 return nil 156 } 157 158 func (g *FS) maybeUpdateTree(branch string) error { 159 existingTree, ok := g.branches[branch] 160 if !ok { 161 return fs.ErrNotExist 162 } 163 164 if !existingTree.Expired { 165 return nil 166 } 167 168 existingTree.Expired = false 169 defer time.AfterFunc(treeExpiryPeriod*time.Second, func() { existingTree.Expired = true }) 170 171 resp, err := g.apiRequest( 172 "GET", 173 fmt.Sprintf( 174 "https://api.github.com/repos/%s/%s/git/trees/%s?recursive=1", 175 g.owner, g.repo, branch, 176 ), 177 "application/vnd.github+json", 178 nil, 179 ) 180 if err != nil { 181 return err 182 } 183 if resp.StatusCode != 200 { 184 return ErrBadStatus{status: resp.Status} 185 } 186 defer resp.Body.Close() 187 188 var newTree Tree 189 if err = json.NewDecoder(resp.Body).Decode(&newTree); err != nil { 190 return err 191 } 192 193 g.branches[branch] = newTree 194 return nil 195 } 196 197 func (g *FS) Chmod(name string, mode fs.FileMode) error { 198 return errors.ErrUnsupported 199 } 200 201 func (g *FS) Chown(name string, uid, gid int) error { 202 return errors.ErrUnsupported 203 } 204 205 func (g *FS) Chtimes(name string, atime time.Time, mtime time.Time) error { 206 return errors.ErrUnsupported 207 } 208 209 func (g *FS) Create(name string) (fs.File, error) { 210 return g.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644) 211 } 212 213 func (g *FS) Mkdir(name string, perm fs.FileMode) error { 214 panic("TODO") 215 } 216 217 func (g *FS) MkdirAll(path string, perm fs.FileMode) error { 218 panic("TODO") 219 } 220 221 func (g *FS) Open(name string) (fs.File, error) { 222 return g.OpenFile(name, os.O_RDONLY, 0) 223 } 224 225 func (g *FS) OpenFile(name string, flag int, perm fs.FileMode) (fs.File, error) { 226 if !fs.ValidPath(name) { 227 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid} 228 } 229 230 // TODO: handle perm, both mode and permissions. 231 232 // Request file in repo at subpath "name" 233 // Read file contents into memory buffer 234 // User can read & modify buffer 235 // Make a update file (PUT) request 236 237 f := file{gfs: g, flags: flag} 238 branch, subpath, hasSubpath := strings.Cut(name, "/") 239 justCreated := false 240 241 { 242 fi, err := g.Stat(name) 243 if err == nil { 244 if flag&(os.O_EXCL|os.O_CREATE) == (os.O_EXCL | os.O_CREATE) { 245 return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrExist} 246 } 247 248 f.fileInfo = *fi.(*fileInfo) 249 250 if fi.IsDir() || !hasSubpath { 251 return &f, nil 252 } 253 } 254 255 if err != nil { 256 if errors.Is(err, fs.ErrNotExist) && flag&os.O_CREATE > 0 { 257 // Defer creation on remote to avoid request conflicts. (See Sync) 258 f.buffer = []byte{} 259 f.dirty = true 260 f.fileInfo = fileInfo{ 261 name: filepath.Base(name), 262 mode: perm, 263 modTime: time.Now().UnixMilli(), 264 branch: branch, 265 subpath: subpath, 266 } 267 268 justCreated = true 269 } else { 270 return nil, &fs.PathError{Op: "open", Path: name, Err: err.(*fs.PathError).Err} 271 } 272 } 273 } 274 275 if flag&os.O_TRUNC > 0 { 276 if !justCreated { 277 f.buffer = []byte{} 278 } 279 f.offset = 0 280 return &f, nil 281 } 282 283 if !justCreated { 284 resp, err := g.apiRequest( 285 "GET", 286 fmt.Sprintf( 287 "https://api.github.com/repos/%s/%s/contents/%s?ref=%s", 288 g.owner, g.repo, subpath, branch, 289 ), 290 "application/vnd.github.raw+json", 291 nil, 292 ) 293 if err != nil { 294 return nil, &fs.PathError{Op: "open", Path: name, Err: err} 295 } 296 if resp.StatusCode != 200 { 297 return nil, &fs.PathError{Op: "open", Path: name, Err: ErrBadStatus{status: resp.Status}} 298 } 299 defer resp.Body.Close() 300 301 f.buffer, err = io.ReadAll(resp.Body) 302 f.fileInfo.size = resp.ContentLength 303 if err != nil { 304 return nil, &fs.PathError{Op: "open", Path: name, Err: err} 305 } 306 } 307 308 if flag&os.O_APPEND > 0 { 309 f.Seek(0, io.SeekEnd) 310 } 311 312 return &f, nil 313 } 314 315 func (g *FS) Remove(name string) error { 316 if !fs.ValidPath(name) { 317 return &fs.PathError{Op: "remove", Path: name, Err: fs.ErrInvalid} 318 } 319 320 fi, err := g.Stat(name) 321 if err != nil { 322 return &fs.PathError{Op: "remove", Path: name, Err: err.(*fs.PathError).Err} 323 } 324 325 if fi.IsDir() { 326 // Use RemoveAll instead 327 return &fs.PathError{Op: "remove", Path: name, Err: errors.ErrUnsupported} 328 } 329 330 fInfo := fi.(*fileInfo) 331 332 resp, err := g.apiRequest( 333 "DELETE", 334 fmt.Sprintf("https://api.github.com/repos/%s/%s/contents/%s", 335 g.owner, g.repo, fInfo.subpath, 336 ), 337 "application/vnd.github+json", 338 bytes.NewBufferString( 339 fmt.Sprintf( 340 `{"message":"Remove '%s'","branch":"%s","sha":"%s"}`, 341 fInfo.subpath, fInfo.branch, fInfo.sha, 342 ), 343 ), 344 ) 345 if err != nil { 346 return &fs.PathError{Op: "remove", Path: name, Err: err} 347 } 348 resp.Body.Close() 349 350 if resp.StatusCode != 200 { 351 return &fs.PathError{Op: "remove", Path: name, Err: ErrBadStatus{status: resp.Status}} 352 } 353 354 // why can't I just update a map value's fields... 355 tree := g.branches[fInfo.branch] 356 tree.Expired = true 357 g.branches[fInfo.branch] = tree 358 359 return nil 360 } 361 362 func (g *FS) RemoveAll(path string) error { 363 // TODO 364 return g.Remove(path) 365 } 366 367 func (g *FS) Rename(oldname, newname string) error { 368 panic("TODO") 369 } 370 371 func (g *FS) Stat(name string) (fs.FileInfo, error) { 372 if !fs.ValidPath(name) { 373 return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrInvalid} 374 } 375 376 if name == "." { 377 return &fileInfo{name: name, size: 0, isDir: true}, nil 378 } 379 380 branch, subpath, hasSubpath := strings.Cut(name, "/") 381 if err := g.maybeUpdateBranches(); err != nil { 382 return nil, &fs.PathError{Op: "stat", Path: name, Err: err} 383 } 384 if !hasSubpath { 385 if _, ok := g.branches[branch]; !ok { 386 return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} // TODO: add "BranchNotExist" error 387 } 388 return &fileInfo{name: name, size: 0, isDir: true, branch: branch}, nil 389 } 390 if err := g.maybeUpdateTree(branch); err != nil { 391 return nil, &fs.PathError{Op: "stat", Path: name, Err: err} 392 } 393 394 tree, ok := g.branches[branch] 395 if !ok { 396 return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} // TODO: add "BranchNotExist" error 397 } 398 var item *TreeItem = nil 399 for i := 0; i < len(tree.Items); i++ { 400 if tree.Items[i].Path == subpath { 401 item = &tree.Items[i] 402 break 403 } 404 } 405 406 if item == nil { 407 return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} 408 } 409 410 return item.toFileInfo(branch), nil 411 } 412 413 type file struct { 414 gfs *FS 415 416 buffer []byte 417 offset int64 418 dirty bool 419 420 flags int 421 fileInfo 422 } 423 424 func (f *file) Read(b []byte) (int, error) { 425 if f.flags&os.O_WRONLY > 0 { 426 return 0, fs.ErrPermission 427 } 428 429 if f.offset >= int64(len(f.buffer)) { 430 return 0, io.EOF 431 } 432 433 var n int 434 rest := f.buffer[f.offset:] 435 if len(rest) < len(b) { 436 n = len(rest) 437 } else { 438 n = len(b) 439 } 440 441 copy(b, rest[:n]) 442 f.offset += int64(n) 443 return n, nil 444 } 445 446 func (f *file) Write(b []byte) (int, error) { 447 if f.flags&os.O_RDONLY > 0 { 448 return 0, fs.ErrPermission 449 } 450 451 writeEnd := f.offset + int64(len(b)) 452 453 if writeEnd > int64(cap(f.buffer)) { 454 var newCapacity int64 455 if cap(f.buffer) == 0 { 456 newCapacity = 8 457 } else { 458 newCapacity = int64(cap(f.buffer)) * 2 459 } 460 461 for ; writeEnd > newCapacity; newCapacity *= 2 { 462 } 463 464 newBuffer := make([]byte, len(f.buffer), newCapacity) 465 copy(newBuffer, f.buffer) 466 f.buffer = newBuffer 467 } 468 469 copy(f.buffer[f.offset:writeEnd], b) 470 if len(f.buffer) < int(writeEnd) { 471 f.buffer = f.buffer[:writeEnd] 472 } 473 f.offset = writeEnd 474 f.dirty = true 475 return len(b), nil 476 } 477 478 func (f *file) Seek(offset int64, whence int) (int64, error) { 479 switch whence { 480 case io.SeekStart: 481 f.offset = offset 482 case io.SeekCurrent: 483 f.offset += offset 484 case io.SeekEnd: 485 f.offset = int64(len(f.buffer)) + offset 486 } 487 if f.offset < 0 { 488 f.offset = 0 489 return 0, fmt.Errorf("%w: resultant offset cannot be negative", fs.ErrInvalid) 490 } 491 return f.offset, nil 492 } 493 494 func (f *file) Sync() error { 495 if !f.dirty { 496 return nil 497 } 498 499 var encodedContent string 500 if len(f.buffer) > 0 { 501 encodedContent = base64.StdEncoding.EncodeToString(f.buffer) 502 } 503 504 const createBody = `{"message":"Create '%s'","branch":"%s","content":"%s"}` 505 const updateBody = `{"message":"Update '%s'","branch":"%s","content":"%s","sha":"%s"}` 506 507 // If f.sha == "" then we must've just created the file locally, 508 // so we want to create it on the remote too. Otherwise update the remote file. 509 // Deferring creation like this avoids 409 Conflict errors. 510 var body *bytes.Buffer 511 if f.sha == "" { 512 body = bytes.NewBufferString(fmt.Sprintf(createBody, f.subpath, f.branch, encodedContent)) 513 } else { 514 body = bytes.NewBufferString(fmt.Sprintf(updateBody, f.subpath, f.branch, encodedContent, f.sha)) 515 } 516 517 resp, err := f.gfs.apiRequest( 518 "PUT", 519 fmt.Sprintf( 520 "https://api.github.com/repos/%s/%s/contents/%s", 521 f.gfs.owner, f.gfs.repo, f.subpath, 522 ), 523 "application/vnd.github+json", 524 body, 525 ) 526 if err != nil { 527 return err 528 } 529 if resp.StatusCode != 200 && resp.StatusCode != 201 { 530 return ErrBadStatus{status: resp.Status} 531 } 532 defer resp.Body.Close() 533 534 var respJson struct { 535 sha string 536 } 537 if err = json.NewDecoder(resp.Body).Decode(&respJson); err != nil { 538 return err 539 } 540 541 f.size = int64(len(f.buffer)) 542 f.fileInfo.modTime = time.Now().Local().UnixMilli() 543 f.fileInfo.sha = respJson.sha 544 f.dirty = false 545 return nil 546 } 547 548 func (f *file) Close() error { 549 return f.Sync() 550 } 551 552 func (f *file) ReadDir(n int) ([]fs.DirEntry, error) { 553 if !f.IsDir() { 554 return nil, syscall.ENOTDIR 555 } 556 557 if f.name == "." { 558 if err := f.gfs.maybeUpdateBranches(); err != nil { 559 return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err} 560 } 561 var res []fs.DirEntry 562 for branch := range f.gfs.branches { 563 res = append(res, &fileInfo{name: branch, size: 0, isDir: true}) 564 } 565 return res, nil 566 } 567 568 if err := f.gfs.maybeUpdateBranches(); err != nil { 569 return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err} 570 } 571 if err := f.gfs.maybeUpdateTree(f.branch); err != nil { 572 return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err} 573 } 574 575 tree, ok := f.gfs.branches[f.branch] 576 if !ok { 577 // TODO: "ErrOutdatedFile"? 578 // Linux allows reads on open file handles that are outdated, maybe we should do the same? 579 // Could embed the TreeItem inside `file`. 580 return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: fs.ErrNotExist} 581 } 582 583 var res []fs.DirEntry 584 for _, item := range tree.Items { 585 after, found := strings.CutPrefix(item.Path, f.subpath) 586 after = strings.TrimLeft(after, "/") 587 // Only get immediate children 588 if found && after != "" && !strings.ContainsRune(after, '/') { 589 res = append(res, item.toFileInfo(f.branch)) 590 } 591 } 592 593 return res, nil 594 } 595 596 func (f *file) Stat() (fs.FileInfo, error) { 597 return &f.fileInfo, nil 598 } 599 600 // Implements the `FileInfo` and `DirEntry` interfaces 601 type fileInfo struct { 602 // Base name 603 name string 604 size int64 605 mode fs.FileMode 606 modTime int64 607 isDir bool 608 609 branch string 610 subpath string 611 sha string 612 } 613 614 func (i *fileInfo) Name() string { return i.name } 615 func (i *fileInfo) Size() int64 { return i.size } 616 func (i *fileInfo) Mode() fs.FileMode { 617 if i.name == i.branch { 618 return 0755 | fs.ModeDir 619 } 620 if i.IsDir() { 621 return i.mode | fs.ModeDir 622 } 623 return i.mode 624 } 625 func (i *fileInfo) ModTime() time.Time { return time.Unix(i.modTime, 0) } 626 func (i *fileInfo) IsDir() bool { return i.isDir } 627 func (i *fileInfo) Sys() any { return nil } 628 629 // These allow it to act as DirEntry as well 630 631 func (i *fileInfo) Info() (fs.FileInfo, error) { return i, nil } 632 func (i *fileInfo) Type() fs.FileMode { return i.Mode() }