tractor.dev/toolkit-go@v0.0.0-20241010005851-214d91207d07/engine/fs/githubfs/githubfs.go (about)

     1  package githubfs
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/base64"
     6  	"encoding/json"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"net/http"
    11  	"os"
    12  	"path/filepath"
    13  	"strconv"
    14  	"strings"
    15  	"syscall"
    16  	"time"
    17  
    18  	"tractor.dev/toolkit-go/engine/fs"
    19  )
    20  
    21  // TODO: Write requests require a commit message. See if there's a nice way
    22  // to expose this to the user instead of a hardcoded message.
    23  
    24  // TODO: Write requests can fail if requests are sent in parallel or too close together.
    25  // Automatically stagger write requests to avoid this.
    26  
    27  // Given a GitHub repository and access token, this filesystem will use the
    28  // GitHub API to expose a read-write filesystem of the repository contents.
    29  // Its root will contain all branches as directories.
    30  type FS struct {
    31  	owner string
    32  	repo  string
    33  	token string
    34  
    35  	branches        map[string]Tree
    36  	branchesExpired bool
    37  }
    38  
    39  func New(owner, repoName, accessToken string) *FS {
    40  	return &FS{
    41  		owner:           owner,
    42  		repo:            repoName,
    43  		token:           accessToken,
    44  		branches:        make(map[string]Tree),
    45  		branchesExpired: true,
    46  	}
    47  }
    48  
    49  type Tree struct {
    50  	Expired bool `json:"-"`
    51  
    52  	Sha       string     `json:"sha"`
    53  	URL       string     `json:"url"`
    54  	Items     []TreeItem `json:"tree"` // TODO: use map[Path]TreeItem instead?
    55  	Truncated bool       `json:"truncated"`
    56  }
    57  type TreeItem struct {
    58  	Path string `json:"path"`
    59  	Mode string `json:"mode"`
    60  	Type string `json:"type"`
    61  	Size int64  `json:"size"`
    62  	Sha  string `json:"sha"`
    63  	URL  string `json:"url"`
    64  }
    65  
    66  func (ti *TreeItem) toFileInfo(branch string) *fileInfo {
    67  	// TODO: mtime?
    68  	mode, _ := strconv.ParseUint(ti.Mode, 8, 32)
    69  	return &fileInfo{
    70  		name:    filepath.Base(ti.Path),
    71  		size:    ti.Size,
    72  		isDir:   ti.Type == "tree",
    73  		mode:    fs.FileMode(mode),
    74  		branch:  branch,
    75  		subpath: ti.Path,
    76  		sha:     ti.Sha,
    77  	}
    78  }
    79  
    80  type ErrBadStatus struct {
    81  	status string
    82  }
    83  
    84  func (e ErrBadStatus) Error() string {
    85  	return "BadStatus: " + e.status
    86  }
    87  
    88  func (g *FS) apiRequest(method, url, acceptHeader string, body io.Reader) (*http.Response, error) {
    89  	req, err := http.NewRequest(method, url, body)
    90  	if err != nil {
    91  		return nil, err
    92  	}
    93  	req.Header.Add("Accept", acceptHeader)
    94  	req.Header.Add("Authorization", fmt.Sprintf("Bearer %s", g.token))
    95  	req.Header.Add("X-GitHub-Api-Version", "2022-11-28")
    96  
    97  	resp, err := http.DefaultClient.Do(req)
    98  	if err != nil {
    99  		return nil, err
   100  	}
   101  
   102  	if resp.StatusCode == 401 {
   103  		return resp, fs.ErrPermission
   104  	}
   105  
   106  	return resp, nil
   107  }
   108  
   109  // Every filesystem query is prefixed by a branch name, so `maybeUpdateBranches()`
   110  // must be called for every query before accessing it's Tree. `maybeUpdateTree()`
   111  // is only necessary when accessing Tree contents.
   112  
   113  // Both in seconds.
   114  // Optimize for least amount of Requests without visible loss of sync with remote.
   115  const branchesExpiryPeriod = 5
   116  const treeExpiryPeriod = 1
   117  
   118  func (g *FS) maybeUpdateBranches() error {
   119  	if !g.branchesExpired {
   120  		return nil
   121  	}
   122  
   123  	g.branchesExpired = false
   124  	defer time.AfterFunc(branchesExpiryPeriod*time.Second, func() { g.branchesExpired = true })
   125  
   126  	resp, err := g.apiRequest(
   127  		"GET",
   128  		fmt.Sprintf(
   129  			"https://api.github.com/repos/%s/%s/branches",
   130  			g.owner, g.repo,
   131  		),
   132  		"application/vnd.github+json",
   133  		nil,
   134  	)
   135  	if err != nil {
   136  		return err
   137  	}
   138  	if resp.StatusCode != 200 {
   139  		return ErrBadStatus{status: resp.Status}
   140  	}
   141  	defer resp.Body.Close()
   142  
   143  	var branches []struct {
   144  		Name string `json:"name"`
   145  	}
   146  	if err := json.NewDecoder(resp.Body).Decode(&branches); err != nil {
   147  		return err
   148  	}
   149  
   150  	// TODO: apply diff instead of clearing the whole thing?
   151  	clear(g.branches)
   152  	for _, branch := range branches {
   153  		g.branches[branch.Name] = Tree{Expired: true}
   154  	}
   155  	return nil
   156  }
   157  
   158  func (g *FS) maybeUpdateTree(branch string) error {
   159  	existingTree, ok := g.branches[branch]
   160  	if !ok {
   161  		return fs.ErrNotExist
   162  	}
   163  
   164  	if !existingTree.Expired {
   165  		return nil
   166  	}
   167  
   168  	existingTree.Expired = false
   169  	defer time.AfterFunc(treeExpiryPeriod*time.Second, func() { existingTree.Expired = true })
   170  
   171  	resp, err := g.apiRequest(
   172  		"GET",
   173  		fmt.Sprintf(
   174  			"https://api.github.com/repos/%s/%s/git/trees/%s?recursive=1",
   175  			g.owner, g.repo, branch,
   176  		),
   177  		"application/vnd.github+json",
   178  		nil,
   179  	)
   180  	if err != nil {
   181  		return err
   182  	}
   183  	if resp.StatusCode != 200 {
   184  		return ErrBadStatus{status: resp.Status}
   185  	}
   186  	defer resp.Body.Close()
   187  
   188  	var newTree Tree
   189  	if err = json.NewDecoder(resp.Body).Decode(&newTree); err != nil {
   190  		return err
   191  	}
   192  
   193  	g.branches[branch] = newTree
   194  	return nil
   195  }
   196  
   197  func (g *FS) Chmod(name string, mode fs.FileMode) error {
   198  	return errors.ErrUnsupported
   199  }
   200  
   201  func (g *FS) Chown(name string, uid, gid int) error {
   202  	return errors.ErrUnsupported
   203  }
   204  
   205  func (g *FS) Chtimes(name string, atime time.Time, mtime time.Time) error {
   206  	return errors.ErrUnsupported
   207  }
   208  
   209  func (g *FS) Create(name string) (fs.File, error) {
   210  	return g.OpenFile(name, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0644)
   211  }
   212  
   213  func (g *FS) Mkdir(name string, perm fs.FileMode) error {
   214  	panic("TODO")
   215  }
   216  
   217  func (g *FS) MkdirAll(path string, perm fs.FileMode) error {
   218  	panic("TODO")
   219  }
   220  
   221  func (g *FS) Open(name string) (fs.File, error) {
   222  	return g.OpenFile(name, os.O_RDONLY, 0)
   223  }
   224  
   225  func (g *FS) OpenFile(name string, flag int, perm fs.FileMode) (fs.File, error) {
   226  	if !fs.ValidPath(name) {
   227  		return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrInvalid}
   228  	}
   229  
   230  	// TODO: handle perm, both mode and permissions.
   231  
   232  	// Request file in repo at subpath "name"
   233  	// Read file contents into memory buffer
   234  	// User can read & modify buffer
   235  	// Make a update file (PUT) request
   236  
   237  	f := file{gfs: g, flags: flag}
   238  	branch, subpath, hasSubpath := strings.Cut(name, "/")
   239  	justCreated := false
   240  
   241  	{
   242  		fi, err := g.Stat(name)
   243  		if err == nil {
   244  			if flag&(os.O_EXCL|os.O_CREATE) == (os.O_EXCL | os.O_CREATE) {
   245  				return nil, &fs.PathError{Op: "open", Path: name, Err: fs.ErrExist}
   246  			}
   247  
   248  			f.fileInfo = *fi.(*fileInfo)
   249  
   250  			if fi.IsDir() || !hasSubpath {
   251  				return &f, nil
   252  			}
   253  		}
   254  
   255  		if err != nil {
   256  			if errors.Is(err, fs.ErrNotExist) && flag&os.O_CREATE > 0 {
   257  				// Defer creation on remote to avoid request conflicts. (See Sync)
   258  				f.buffer = []byte{}
   259  				f.dirty = true
   260  				f.fileInfo = fileInfo{
   261  					name:    filepath.Base(name),
   262  					mode:    perm,
   263  					modTime: time.Now().UnixMilli(),
   264  					branch:  branch,
   265  					subpath: subpath,
   266  				}
   267  
   268  				justCreated = true
   269  			} else {
   270  				return nil, &fs.PathError{Op: "open", Path: name, Err: err.(*fs.PathError).Err}
   271  			}
   272  		}
   273  	}
   274  
   275  	if flag&os.O_TRUNC > 0 {
   276  		if !justCreated {
   277  			f.buffer = []byte{}
   278  		}
   279  		f.offset = 0
   280  		return &f, nil
   281  	}
   282  
   283  	if !justCreated {
   284  		resp, err := g.apiRequest(
   285  			"GET",
   286  			fmt.Sprintf(
   287  				"https://api.github.com/repos/%s/%s/contents/%s?ref=%s",
   288  				g.owner, g.repo, subpath, branch,
   289  			),
   290  			"application/vnd.github.raw+json",
   291  			nil,
   292  		)
   293  		if err != nil {
   294  			return nil, &fs.PathError{Op: "open", Path: name, Err: err}
   295  		}
   296  		if resp.StatusCode != 200 {
   297  			return nil, &fs.PathError{Op: "open", Path: name, Err: ErrBadStatus{status: resp.Status}}
   298  		}
   299  		defer resp.Body.Close()
   300  
   301  		f.buffer, err = io.ReadAll(resp.Body)
   302  		f.fileInfo.size = resp.ContentLength
   303  		if err != nil {
   304  			return nil, &fs.PathError{Op: "open", Path: name, Err: err}
   305  		}
   306  	}
   307  
   308  	if flag&os.O_APPEND > 0 {
   309  		f.Seek(0, io.SeekEnd)
   310  	}
   311  
   312  	return &f, nil
   313  }
   314  
   315  func (g *FS) Remove(name string) error {
   316  	if !fs.ValidPath(name) {
   317  		return &fs.PathError{Op: "remove", Path: name, Err: fs.ErrInvalid}
   318  	}
   319  
   320  	fi, err := g.Stat(name)
   321  	if err != nil {
   322  		return &fs.PathError{Op: "remove", Path: name, Err: err.(*fs.PathError).Err}
   323  	}
   324  
   325  	if fi.IsDir() {
   326  		// Use RemoveAll instead
   327  		return &fs.PathError{Op: "remove", Path: name, Err: errors.ErrUnsupported}
   328  	}
   329  
   330  	fInfo := fi.(*fileInfo)
   331  
   332  	resp, err := g.apiRequest(
   333  		"DELETE",
   334  		fmt.Sprintf("https://api.github.com/repos/%s/%s/contents/%s",
   335  			g.owner, g.repo, fInfo.subpath,
   336  		),
   337  		"application/vnd.github+json",
   338  		bytes.NewBufferString(
   339  			fmt.Sprintf(
   340  				`{"message":"Remove '%s'","branch":"%s","sha":"%s"}`,
   341  				fInfo.subpath, fInfo.branch, fInfo.sha,
   342  			),
   343  		),
   344  	)
   345  	if err != nil {
   346  		return &fs.PathError{Op: "remove", Path: name, Err: err}
   347  	}
   348  	resp.Body.Close()
   349  
   350  	if resp.StatusCode != 200 {
   351  		return &fs.PathError{Op: "remove", Path: name, Err: ErrBadStatus{status: resp.Status}}
   352  	}
   353  
   354  	// why can't I just update a map value's fields...
   355  	tree := g.branches[fInfo.branch]
   356  	tree.Expired = true
   357  	g.branches[fInfo.branch] = tree
   358  
   359  	return nil
   360  }
   361  
   362  func (g *FS) RemoveAll(path string) error {
   363  	// TODO
   364  	return g.Remove(path)
   365  }
   366  
   367  func (g *FS) Rename(oldname, newname string) error {
   368  	panic("TODO")
   369  }
   370  
   371  func (g *FS) Stat(name string) (fs.FileInfo, error) {
   372  	if !fs.ValidPath(name) {
   373  		return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrInvalid}
   374  	}
   375  
   376  	if name == "." {
   377  		return &fileInfo{name: name, size: 0, isDir: true}, nil
   378  	}
   379  
   380  	branch, subpath, hasSubpath := strings.Cut(name, "/")
   381  	if err := g.maybeUpdateBranches(); err != nil {
   382  		return nil, &fs.PathError{Op: "stat", Path: name, Err: err}
   383  	}
   384  	if !hasSubpath {
   385  		if _, ok := g.branches[branch]; !ok {
   386  			return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} // TODO: add "BranchNotExist" error
   387  		}
   388  		return &fileInfo{name: name, size: 0, isDir: true, branch: branch}, nil
   389  	}
   390  	if err := g.maybeUpdateTree(branch); err != nil {
   391  		return nil, &fs.PathError{Op: "stat", Path: name, Err: err}
   392  	}
   393  
   394  	tree, ok := g.branches[branch]
   395  	if !ok {
   396  		return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist} // TODO: add "BranchNotExist" error
   397  	}
   398  	var item *TreeItem = nil
   399  	for i := 0; i < len(tree.Items); i++ {
   400  		if tree.Items[i].Path == subpath {
   401  			item = &tree.Items[i]
   402  			break
   403  		}
   404  	}
   405  
   406  	if item == nil {
   407  		return nil, &fs.PathError{Op: "stat", Path: name, Err: fs.ErrNotExist}
   408  	}
   409  
   410  	return item.toFileInfo(branch), nil
   411  }
   412  
   413  type file struct {
   414  	gfs *FS
   415  
   416  	buffer []byte
   417  	offset int64
   418  	dirty  bool
   419  
   420  	flags int
   421  	fileInfo
   422  }
   423  
   424  func (f *file) Read(b []byte) (int, error) {
   425  	if f.flags&os.O_WRONLY > 0 {
   426  		return 0, fs.ErrPermission
   427  	}
   428  
   429  	if f.offset >= int64(len(f.buffer)) {
   430  		return 0, io.EOF
   431  	}
   432  
   433  	var n int
   434  	rest := f.buffer[f.offset:]
   435  	if len(rest) < len(b) {
   436  		n = len(rest)
   437  	} else {
   438  		n = len(b)
   439  	}
   440  
   441  	copy(b, rest[:n])
   442  	f.offset += int64(n)
   443  	return n, nil
   444  }
   445  
   446  func (f *file) Write(b []byte) (int, error) {
   447  	if f.flags&os.O_RDONLY > 0 {
   448  		return 0, fs.ErrPermission
   449  	}
   450  
   451  	writeEnd := f.offset + int64(len(b))
   452  
   453  	if writeEnd > int64(cap(f.buffer)) {
   454  		var newCapacity int64
   455  		if cap(f.buffer) == 0 {
   456  			newCapacity = 8
   457  		} else {
   458  			newCapacity = int64(cap(f.buffer)) * 2
   459  		}
   460  
   461  		for ; writeEnd > newCapacity; newCapacity *= 2 {
   462  		}
   463  
   464  		newBuffer := make([]byte, len(f.buffer), newCapacity)
   465  		copy(newBuffer, f.buffer)
   466  		f.buffer = newBuffer
   467  	}
   468  
   469  	copy(f.buffer[f.offset:writeEnd], b)
   470  	if len(f.buffer) < int(writeEnd) {
   471  		f.buffer = f.buffer[:writeEnd]
   472  	}
   473  	f.offset = writeEnd
   474  	f.dirty = true
   475  	return len(b), nil
   476  }
   477  
   478  func (f *file) Seek(offset int64, whence int) (int64, error) {
   479  	switch whence {
   480  	case io.SeekStart:
   481  		f.offset = offset
   482  	case io.SeekCurrent:
   483  		f.offset += offset
   484  	case io.SeekEnd:
   485  		f.offset = int64(len(f.buffer)) + offset
   486  	}
   487  	if f.offset < 0 {
   488  		f.offset = 0
   489  		return 0, fmt.Errorf("%w: resultant offset cannot be negative", fs.ErrInvalid)
   490  	}
   491  	return f.offset, nil
   492  }
   493  
   494  func (f *file) Sync() error {
   495  	if !f.dirty {
   496  		return nil
   497  	}
   498  
   499  	var encodedContent string
   500  	if len(f.buffer) > 0 {
   501  		encodedContent = base64.StdEncoding.EncodeToString(f.buffer)
   502  	}
   503  
   504  	const createBody = `{"message":"Create '%s'","branch":"%s","content":"%s"}`
   505  	const updateBody = `{"message":"Update '%s'","branch":"%s","content":"%s","sha":"%s"}`
   506  
   507  	// If f.sha == "" then we must've just created the file locally,
   508  	// so we want to create it on the remote too. Otherwise update the remote file.
   509  	// Deferring creation like this avoids 409 Conflict errors.
   510  	var body *bytes.Buffer
   511  	if f.sha == "" {
   512  		body = bytes.NewBufferString(fmt.Sprintf(createBody, f.subpath, f.branch, encodedContent))
   513  	} else {
   514  		body = bytes.NewBufferString(fmt.Sprintf(updateBody, f.subpath, f.branch, encodedContent, f.sha))
   515  	}
   516  
   517  	resp, err := f.gfs.apiRequest(
   518  		"PUT",
   519  		fmt.Sprintf(
   520  			"https://api.github.com/repos/%s/%s/contents/%s",
   521  			f.gfs.owner, f.gfs.repo, f.subpath,
   522  		),
   523  		"application/vnd.github+json",
   524  		body,
   525  	)
   526  	if err != nil {
   527  		return err
   528  	}
   529  	if resp.StatusCode != 200 && resp.StatusCode != 201 {
   530  		return ErrBadStatus{status: resp.Status}
   531  	}
   532  	defer resp.Body.Close()
   533  
   534  	var respJson struct {
   535  		sha string
   536  	}
   537  	if err = json.NewDecoder(resp.Body).Decode(&respJson); err != nil {
   538  		return err
   539  	}
   540  
   541  	f.size = int64(len(f.buffer))
   542  	f.fileInfo.modTime = time.Now().Local().UnixMilli()
   543  	f.fileInfo.sha = respJson.sha
   544  	f.dirty = false
   545  	return nil
   546  }
   547  
   548  func (f *file) Close() error {
   549  	return f.Sync()
   550  }
   551  
   552  func (f *file) ReadDir(n int) ([]fs.DirEntry, error) {
   553  	if !f.IsDir() {
   554  		return nil, syscall.ENOTDIR
   555  	}
   556  
   557  	if f.name == "." {
   558  		if err := f.gfs.maybeUpdateBranches(); err != nil {
   559  			return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err}
   560  		}
   561  		var res []fs.DirEntry
   562  		for branch := range f.gfs.branches {
   563  			res = append(res, &fileInfo{name: branch, size: 0, isDir: true})
   564  		}
   565  		return res, nil
   566  	}
   567  
   568  	if err := f.gfs.maybeUpdateBranches(); err != nil {
   569  		return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err}
   570  	}
   571  	if err := f.gfs.maybeUpdateTree(f.branch); err != nil {
   572  		return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: err}
   573  	}
   574  
   575  	tree, ok := f.gfs.branches[f.branch]
   576  	if !ok {
   577  		// TODO: "ErrOutdatedFile"?
   578  		// Linux allows reads on open file handles that are outdated, maybe we should do the same?
   579  		// Could embed the TreeItem inside `file`.
   580  		return nil, &fs.PathError{Op: "readdir", Path: f.name, Err: fs.ErrNotExist}
   581  	}
   582  
   583  	var res []fs.DirEntry
   584  	for _, item := range tree.Items {
   585  		after, found := strings.CutPrefix(item.Path, f.subpath)
   586  		after = strings.TrimLeft(after, "/")
   587  		// Only get immediate children
   588  		if found && after != "" && !strings.ContainsRune(after, '/') {
   589  			res = append(res, item.toFileInfo(f.branch))
   590  		}
   591  	}
   592  
   593  	return res, nil
   594  }
   595  
   596  func (f *file) Stat() (fs.FileInfo, error) {
   597  	return &f.fileInfo, nil
   598  }
   599  
   600  // Implements the `FileInfo` and `DirEntry` interfaces
   601  type fileInfo struct {
   602  	// Base name
   603  	name    string
   604  	size    int64
   605  	mode    fs.FileMode
   606  	modTime int64
   607  	isDir   bool
   608  
   609  	branch  string
   610  	subpath string
   611  	sha     string
   612  }
   613  
   614  func (i *fileInfo) Name() string { return i.name }
   615  func (i *fileInfo) Size() int64  { return i.size }
   616  func (i *fileInfo) Mode() fs.FileMode {
   617  	if i.name == i.branch {
   618  		return 0755 | fs.ModeDir
   619  	}
   620  	if i.IsDir() {
   621  		return i.mode | fs.ModeDir
   622  	}
   623  	return i.mode
   624  }
   625  func (i *fileInfo) ModTime() time.Time { return time.Unix(i.modTime, 0) }
   626  func (i *fileInfo) IsDir() bool        { return i.isDir }
   627  func (i *fileInfo) Sys() any           { return nil }
   628  
   629  // These allow it to act as DirEntry as well
   630  
   631  func (i *fileInfo) Info() (fs.FileInfo, error) { return i, nil }
   632  func (i *fileInfo) Type() fs.FileMode          { return i.Mode() }