kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/tools/kythefs/kythefs.go (about)

     1  /*
     2   * Copyright 2019 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Binary KytheFS exposes file content stored in Kythe as a virtual filesystem.
    18  //
    19  // Example usage:
    20  //
    21  //	bazel build kythe/go/serving/tools/kythefs
    22  //	# Blocks until unmounted:
    23  //	./bazel-bin/kythe/go/serving/tools/kythefs/kythefs --mountpoint vfs_dir
    24  //
    25  //	# To unmount:
    26  //	fusermount -u vfs_dir
    27  package main
    28  
    29  import (
    30  	"context"
    31  	"flag"
    32  	"fmt"
    33  	"os"
    34  	"path/filepath"
    35  	"strings"
    36  
    37  	"kythe.io/kythe/go/serving/api"
    38  	"kythe.io/kythe/go/util/flagutil"
    39  	"kythe.io/kythe/go/util/kytheuri"
    40  	"kythe.io/kythe/go/util/log"
    41  	"kythe.io/kythe/go/util/schema/facts"
    42  
    43  	"github.com/hanwen/go-fuse/fuse"
    44  	"github.com/hanwen/go-fuse/fuse/nodefs"
    45  	"github.com/hanwen/go-fuse/fuse/pathfs"
    46  
    47  	ftpb "kythe.io/kythe/proto/filetree_go_proto"
    48  	gpb "kythe.io/kythe/proto/graph_go_proto"
    49  	xpb "kythe.io/kythe/proto/xref_go_proto"
    50  )
    51  
    52  var (
    53  	serverAddr = flag.String("server", "http://localhost:8080",
    54  		"The address of the Kythe service to use. For example http://localhost:8080.")
    55  	mountPoint = flag.String("mountpoint", "",
    56  		"Path to existing directory to mount KytheFS at.")
    57  )
    58  
    59  func init() {
    60  	flag.Usage = flagutil.SimpleUsage("Mounts file content stored in Kythe as a virtual filesystem.",
    61  		"The files are laid out on a path <corpus>/<root>/<path>.",
    62  		"(--mountpoint MOUNT_PATH)",
    63  		"[--server SERVER_ADDRESS]")
    64  }
    65  
    66  type kytheFS struct {
    67  	pathfs.FileSystem
    68  	Context context.Context
    69  	API     api.Interface
    70  
    71  	WarnedEmptyCorpus       bool
    72  	WarnedOverlappingPrefix bool
    73  }
    74  
    75  // A FilepathResolution is the result of mapping a vfs path into a Kythe uri
    76  // component or a proper KytheUri. The mapping happens in the context of a given
    77  // corpus+root.
    78  //
    79  // Only one of the fields has a non-default value, depending on the resolution.
    80  type FilepathResolution struct {
    81  	// Present if the filepath was resolved to a Kythe URI.
    82  	//
    83  	// For example, given corpus "foo" and root "bar/baz", KytheUri will resolve
    84  	// as follows:
    85  	//
    86  	//     filepath to resolve | KytheUri
    87  	//     "foo/bar/baz"       | "kythe://foo?root=bar/baz?path="
    88  	//     "foo/bar/baz/quux"  | "kythe://foo?root=bar/baz?path=quux"
    89  	//
    90  	KytheURI *kytheuri.URI
    91  
    92  	// Present if the filepath is a prefix of a given corpus+root.
    93  	// Marks the next filepath component on the corpus+root filepath.
    94  	//
    95  	// For example, given corpus "foo" and root "bar/baz", NextDirComponent will
    96  	// resolve as follows:
    97  	//
    98  	//     filepath to resolve | NextDirComponent
    99  	//     ""                  | "foo"
   100  	//     "foo"               | "bar"
   101  	//     "foo/bar"           | "baz"
   102  	//
   103  	NextDirComponent string
   104  }
   105  
   106  // hasDirComponent returns true if 'rs' contains a 'NextDirComponent' resolution.
   107  func hasDirComponent(rs []FilepathResolution) bool {
   108  	for _, r := range rs {
   109  		if r.NextDirComponent != "" {
   110  			return true
   111  		}
   112  	}
   113  	return false
   114  }
   115  
   116  // ResolveFilepath returns alternative resolutions of a given vfs path.
   117  // The multiple resolutions are due to ambiguity, which occurs due to:
   118  //
   119  //	a) The queried path pointing into a prefix of some corpus+root vfs path.
   120  //
   121  //	b) Overlapping corpus+root+path vfs paths. If happens, you likely need to
   122  //	   adjust the extractor's vname mapping config.
   123  func (me *kytheFS) ResolveFilepath(path string) ([]FilepathResolution, error) {
   124  	var req ftpb.CorpusRootsRequest
   125  	cr, err := me.API.CorpusRoots(me.Context, &req)
   126  	if err != nil {
   127  		return nil, err
   128  	}
   129  
   130  	// Set to dedup next-dirs, as they can be common against multiple corpus+root
   131  	// pairs.
   132  	nextDirs := make(map[string]bool)
   133  
   134  	var ticketResolution *FilepathResolution
   135  
   136  	sep := string(os.PathSeparator)
   137  
   138  	if path == "" {
   139  		// List top-level vfs dirs, which are the first components of corpus names.
   140  		for _, corpus := range me.NonEmptyCorpora(cr.Corpus) {
   141  			parts := strings.SplitN(corpus.Name, sep, 2)
   142  			nextDirs[parts[0]] = true
   143  		}
   144  	} else {
   145  		// Given a vfs directory path, collect the listings visible in that
   146  		// directory. These are the following directory components and maybe a
   147  		// resolved/ Kythe URI of a matching file.
   148  
   149  		// If a path could resolve to multiple URIs, due to an overlap in
   150  		// corpus+root+path_prefix, we arbitrary prefer the one matching the one
   151  		// with the longer corpus+root.
   152  		//
   153  		// We could alternatively check existence of the path with API calls, but
   154  		// this situation shouldn't normally arise unless extraction config is broken.
   155  		var longestCorpusRootForTicket string
   156  
   157  		for _, corpus := range me.NonEmptyCorpora(cr.Corpus) {
   158  			for _, root := range corpus.Root {
   159  				crPath := filepath.Join(corpus.Name, root)
   160  				crRemain := strings.TrimPrefix(crPath, path)
   161  				vfsRemain := strings.TrimPrefix(path, crPath)
   162  				if len(vfsRemain) < len(path) &&
   163  					(vfsRemain == "" || vfsRemain[:1] == sep) &&
   164  					len(crPath) > len(longestCorpusRootForTicket) {
   165  
   166  					if longestCorpusRootForTicket != "" && !me.WarnedOverlappingPrefix {
   167  						log.Warningf("There is at least one overlap in corpus+root+path_prefix. "+
   168  							"Path: %q (corpus+root %q), corpus+root of conflicting path: %q ",
   169  							path, crPath, longestCorpusRootForTicket)
   170  						me.WarnedOverlappingPrefix = true
   171  					}
   172  
   173  					longestCorpusRootForTicket = crPath
   174  					// Points inside corpus+root, match using the ticket.
   175  					var p string // Exact corpus+root match.
   176  					if vfsRemain != "" {
   177  						p = vfsRemain[1:] // Additional kythe path.
   178  					}
   179  					ticketResolution = &FilepathResolution{
   180  						KytheURI: &kytheuri.URI{
   181  							Corpus: corpus.Name,
   182  							Root:   root,
   183  							Path:   p,
   184  						},
   185  					}
   186  				} else if len(crRemain) < len(crPath) && crRemain[:1] == sep {
   187  					// Queried path is a proper prefix.
   188  					parts := strings.SplitN(crRemain[1:], sep, 2)
   189  					nextDirs[parts[0]] = true
   190  				}
   191  			}
   192  		}
   193  	}
   194  
   195  	var results []FilepathResolution
   196  	for k := range nextDirs {
   197  		res := FilepathResolution{NextDirComponent: k}
   198  		results = append(results, res)
   199  	}
   200  	if ticketResolution != nil {
   201  		results = append(results, *ticketResolution)
   202  	}
   203  	return results, nil
   204  }
   205  
   206  // NonEmptyCorpora returns the non-empty named corpuses, and warns the first time
   207  // an empty corpus name is encountered.
   208  //
   209  // Empty corpus names are not worth the trouble for special handling, given
   210  // that naming corpora comes without drawbacks and is a good practice.
   211  func (me *kytheFS) NonEmptyCorpora(cs []*ftpb.CorpusRootsReply_Corpus) []*ftpb.CorpusRootsReply_Corpus {
   212  	var res []*ftpb.CorpusRootsReply_Corpus
   213  	for _, c := range cs {
   214  		if c.Name != "" {
   215  			res = append(res, c)
   216  		} else if !me.WarnedEmptyCorpus {
   217  			log.Warningf("found empty corpus name, skipping mapping! " +
   218  				"Please set a corpus when extracting or indexing.")
   219  			me.WarnedEmptyCorpus = true
   220  		}
   221  	}
   222  	return res
   223  }
   224  
   225  // IsDirectory returns true if the given Kythe URI corresponds to a directory.
   226  //
   227  // Actually it checks that the path is not a known file. Could also use the
   228  // filetree api to check contents of the parent. But I expect this code to
   229  // change when caching is added, then we will determine directory-ness from
   230  // a local cache (and it will be a separate concern how we fill that cache).
   231  func (me *kytheFS) IsDirectory(uri *kytheuri.URI) (bool, error) {
   232  	ticket := uri.String()
   233  	req := &gpb.NodesRequest{
   234  		Ticket: []string{ticket},
   235  		// Minimize amount of data returned, ask for NodeKind only.
   236  		Filter: []string{facts.NodeKind},
   237  	}
   238  	res, err := me.API.Nodes(me.Context, req)
   239  	if err != nil {
   240  		return false, nil
   241  	}
   242  	for k, n := range res.Nodes {
   243  		if k != ticket {
   244  			continue
   245  		}
   246  		if len(n.Facts) > 0 {
   247  			// Directory entries don't have any facts.
   248  			return false, nil
   249  		}
   250  	}
   251  	return true, nil
   252  }
   253  
   254  func (me *kytheFS) fetchSourceForURI(uri *kytheuri.URI) ([]byte, error) {
   255  	ticket := uri.String()
   256  	dec, err := me.API.Decorations(me.Context, &xpb.DecorationsRequest{
   257  		Location:   &xpb.Location{Ticket: ticket},
   258  		SourceText: true,
   259  	})
   260  	if err != nil {
   261  		return nil, err
   262  	}
   263  	return dec.SourceText, nil
   264  }
   265  
   266  func (me *kytheFS) fetchSource(path string) ([]byte, error) {
   267  	resolutions, err := me.ResolveFilepath(path)
   268  	if err != nil {
   269  		return nil, err
   270  	}
   271  
   272  	for _, r := range resolutions {
   273  		if r.KytheURI == nil {
   274  			continue
   275  		}
   276  		src, err := me.fetchSourceForURI(r.KytheURI)
   277  		if err != nil {
   278  			return nil, fmt.Errorf(
   279  				"no xrefs for %q (resolved to ticket %q): %v",
   280  				path, r.KytheURI.String(), err)
   281  		}
   282  
   283  		return src, nil
   284  	}
   285  
   286  	return nil, fmt.Errorf("couldn't resolve path %q to a ticket", path)
   287  }
   288  
   289  // GetAttr implements a go-fuse stub.
   290  func (me *kytheFS) GetAttr(path string, context *fuse.Context) (*fuse.Attr, fuse.Status) {
   291  	resolutions, err := me.ResolveFilepath(path)
   292  	if err != nil {
   293  		log.Errorf("resolution error for %q: %v", path, err)
   294  		return nil, fuse.ENOENT
   295  	}
   296  
   297  	if hasDirComponent(resolutions) {
   298  		return &fuse.Attr{
   299  			Mode: fuse.S_IFDIR | 0755,
   300  		}, fuse.OK
   301  	}
   302  
   303  	for _, r := range resolutions {
   304  		if r.KytheURI == nil {
   305  			continue
   306  		}
   307  
   308  		isDir, err := me.IsDirectory(r.KytheURI)
   309  		if err != nil {
   310  			return nil, fuse.ENOENT
   311  		}
   312  
   313  		if isDir {
   314  			return &fuse.Attr{
   315  				Mode: fuse.S_IFDIR | 0755,
   316  			}, fuse.OK
   317  		}
   318  
   319  		src, err := me.fetchSourceForURI(r.KytheURI)
   320  		if err != nil {
   321  			return nil, fuse.ENOENT
   322  		}
   323  		return &fuse.Attr{
   324  			Mode: fuse.S_IFREG | 0644, Size: uint64(len(src)),
   325  		}, fuse.OK
   326  	}
   327  	return nil, fuse.ENOENT
   328  }
   329  
   330  // OpenDir implements a go-fuse stub.
   331  func (me *kytheFS) OpenDir(path string, context *fuse.Context) (c []fuse.DirEntry, code fuse.Status) {
   332  	resolutions, err := me.ResolveFilepath(path)
   333  	if err != nil {
   334  		log.Errorf("resolution error for %q: %v", path, err)
   335  		return nil, fuse.ENOENT
   336  	}
   337  
   338  	// Key by path component, since when a corpus+root segment overlaps with
   339  	// an actual deep path from an other corpus+root, we could get duplicate
   340  	// components otherwise.
   341  	ents := make(map[string]fuse.DirEntry)
   342  	for _, r := range resolutions {
   343  		if r.NextDirComponent != "" {
   344  			ents[r.NextDirComponent] = fuse.DirEntry{
   345  				Name: r.NextDirComponent,
   346  				Mode: fuse.S_IFDIR,
   347  			}
   348  		} else if r.KytheURI != nil {
   349  			req := &ftpb.DirectoryRequest{
   350  				Corpus: r.KytheURI.Corpus,
   351  				Root:   r.KytheURI.Root,
   352  				Path:   r.KytheURI.Path,
   353  			}
   354  			dir, err := me.API.Directory(me.Context, req)
   355  			if err != nil {
   356  				log.Errorf("error fetching dir contents for %q (ticket %q): %v",
   357  					path, r.KytheURI.String(), err)
   358  				return nil, fuse.ENOENT
   359  			}
   360  
   361  			for _, e := range dir.Entry {
   362  				de := fuse.DirEntry{Name: e.Name}
   363  				switch e.Kind {
   364  				case ftpb.DirectoryReply_FILE:
   365  					de.Mode = fuse.S_IFREG
   366  				case ftpb.DirectoryReply_DIRECTORY:
   367  					de.Mode = fuse.S_IFDIR
   368  				default:
   369  					log.Warningf("received invalid directory entry: %v", e)
   370  					continue
   371  				}
   372  				ents[e.Name] = de
   373  			}
   374  		} else {
   375  			log.Fatalf(
   376  				"Programming error: resoultion is neither dir part nor uri for %q",
   377  				path)
   378  		}
   379  	}
   380  	var result []fuse.DirEntry
   381  	for _, v := range ents {
   382  		result = append(result, v)
   383  	}
   384  	return result, fuse.OK
   385  }
   386  
   387  // Open implements a go-fuse stub.
   388  func (me *kytheFS) Open(path string, flags uint32, context *fuse.Context) (file nodefs.File, code fuse.Status) {
   389  	// Read-only filesystem.
   390  	if flags&fuse.O_ANYWRITE != 0 {
   391  		return nil, fuse.EPERM
   392  	}
   393  
   394  	src, err := me.fetchSource(path)
   395  	if err != nil {
   396  		log.Errorf("error fetching source for %q: %v", path, err)
   397  		return nil, fuse.ENOENT
   398  	}
   399  
   400  	return nodefs.NewDataFile(src), fuse.OK
   401  }
   402  
   403  //
   404  // Main
   405  //
   406  
   407  func main() {
   408  	flag.Parse()
   409  	if *serverAddr == "" {
   410  		log.Fatal("You must provide --server address")
   411  	}
   412  	if *mountPoint == "" {
   413  		log.Fatal("You must provide --mountpoint")
   414  	}
   415  
   416  	kytheAPI, err := api.ParseSpec(*serverAddr)
   417  	if err != nil {
   418  		log.Fatal("Failed to parse server address!", *serverAddr)
   419  	}
   420  
   421  	ctx := context.Background()
   422  	defer kytheAPI.Close(ctx)
   423  
   424  	nfs := pathfs.NewPathNodeFs(&kytheFS{
   425  		FileSystem: pathfs.NewDefaultFileSystem(),
   426  		Context:    ctx,
   427  		API:        kytheAPI,
   428  	}, nil)
   429  
   430  	server, _, err := nodefs.MountRoot(*mountPoint, nfs.Root(), nil)
   431  	if err != nil {
   432  		log.Fatalf("Mounting failed: %v", err)
   433  	}
   434  
   435  	server.Serve()
   436  }