kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/tools/kythefs/kythefs.go (about) 1 /* 2 * Copyright 2019 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Binary KytheFS exposes file content stored in Kythe as a virtual filesystem. 18 // 19 // Example usage: 20 // 21 // bazel build kythe/go/serving/tools/kythefs 22 // # Blocks until unmounted: 23 // ./bazel-bin/kythe/go/serving/tools/kythefs/kythefs --mountpoint vfs_dir 24 // 25 // # To unmount: 26 // fusermount -u vfs_dir 27 package main 28 29 import ( 30 "context" 31 "flag" 32 "fmt" 33 "os" 34 "path/filepath" 35 "strings" 36 37 "kythe.io/kythe/go/serving/api" 38 "kythe.io/kythe/go/util/flagutil" 39 "kythe.io/kythe/go/util/kytheuri" 40 "kythe.io/kythe/go/util/log" 41 "kythe.io/kythe/go/util/schema/facts" 42 43 "github.com/hanwen/go-fuse/fuse" 44 "github.com/hanwen/go-fuse/fuse/nodefs" 45 "github.com/hanwen/go-fuse/fuse/pathfs" 46 47 ftpb "kythe.io/kythe/proto/filetree_go_proto" 48 gpb "kythe.io/kythe/proto/graph_go_proto" 49 xpb "kythe.io/kythe/proto/xref_go_proto" 50 ) 51 52 var ( 53 serverAddr = flag.String("server", "http://localhost:8080", 54 "The address of the Kythe service to use. For example http://localhost:8080.") 55 mountPoint = flag.String("mountpoint", "", 56 "Path to existing directory to mount KytheFS at.") 57 ) 58 59 func init() { 60 flag.Usage = flagutil.SimpleUsage("Mounts file content stored in Kythe as a virtual filesystem.", 61 "The files are laid out on a path <corpus>/<root>/<path>.", 62 "(--mountpoint MOUNT_PATH)", 63 "[--server SERVER_ADDRESS]") 64 } 65 66 type kytheFS struct { 67 pathfs.FileSystem 68 Context context.Context 69 API api.Interface 70 71 WarnedEmptyCorpus bool 72 WarnedOverlappingPrefix bool 73 } 74 75 // A FilepathResolution is the result of mapping a vfs path into a Kythe uri 76 // component or a proper KytheUri. The mapping happens in the context of a given 77 // corpus+root. 78 // 79 // Only one of the fields has a non-default value, depending on the resolution. 80 type FilepathResolution struct { 81 // Present if the filepath was resolved to a Kythe URI. 82 // 83 // For example, given corpus "foo" and root "bar/baz", KytheUri will resolve 84 // as follows: 85 // 86 // filepath to resolve | KytheUri 87 // "foo/bar/baz" | "kythe://foo?root=bar/baz?path=" 88 // "foo/bar/baz/quux" | "kythe://foo?root=bar/baz?path=quux" 89 // 90 KytheURI *kytheuri.URI 91 92 // Present if the filepath is a prefix of a given corpus+root. 93 // Marks the next filepath component on the corpus+root filepath. 94 // 95 // For example, given corpus "foo" and root "bar/baz", NextDirComponent will 96 // resolve as follows: 97 // 98 // filepath to resolve | NextDirComponent 99 // "" | "foo" 100 // "foo" | "bar" 101 // "foo/bar" | "baz" 102 // 103 NextDirComponent string 104 } 105 106 // hasDirComponent returns true if 'rs' contains a 'NextDirComponent' resolution. 107 func hasDirComponent(rs []FilepathResolution) bool { 108 for _, r := range rs { 109 if r.NextDirComponent != "" { 110 return true 111 } 112 } 113 return false 114 } 115 116 // ResolveFilepath returns alternative resolutions of a given vfs path. 117 // The multiple resolutions are due to ambiguity, which occurs due to: 118 // 119 // a) The queried path pointing into a prefix of some corpus+root vfs path. 120 // 121 // b) Overlapping corpus+root+path vfs paths. If happens, you likely need to 122 // adjust the extractor's vname mapping config. 123 func (me *kytheFS) ResolveFilepath(path string) ([]FilepathResolution, error) { 124 var req ftpb.CorpusRootsRequest 125 cr, err := me.API.CorpusRoots(me.Context, &req) 126 if err != nil { 127 return nil, err 128 } 129 130 // Set to dedup next-dirs, as they can be common against multiple corpus+root 131 // pairs. 132 nextDirs := make(map[string]bool) 133 134 var ticketResolution *FilepathResolution 135 136 sep := string(os.PathSeparator) 137 138 if path == "" { 139 // List top-level vfs dirs, which are the first components of corpus names. 140 for _, corpus := range me.NonEmptyCorpora(cr.Corpus) { 141 parts := strings.SplitN(corpus.Name, sep, 2) 142 nextDirs[parts[0]] = true 143 } 144 } else { 145 // Given a vfs directory path, collect the listings visible in that 146 // directory. These are the following directory components and maybe a 147 // resolved/ Kythe URI of a matching file. 148 149 // If a path could resolve to multiple URIs, due to an overlap in 150 // corpus+root+path_prefix, we arbitrary prefer the one matching the one 151 // with the longer corpus+root. 152 // 153 // We could alternatively check existence of the path with API calls, but 154 // this situation shouldn't normally arise unless extraction config is broken. 155 var longestCorpusRootForTicket string 156 157 for _, corpus := range me.NonEmptyCorpora(cr.Corpus) { 158 for _, root := range corpus.Root { 159 crPath := filepath.Join(corpus.Name, root) 160 crRemain := strings.TrimPrefix(crPath, path) 161 vfsRemain := strings.TrimPrefix(path, crPath) 162 if len(vfsRemain) < len(path) && 163 (vfsRemain == "" || vfsRemain[:1] == sep) && 164 len(crPath) > len(longestCorpusRootForTicket) { 165 166 if longestCorpusRootForTicket != "" && !me.WarnedOverlappingPrefix { 167 log.Warningf("There is at least one overlap in corpus+root+path_prefix. "+ 168 "Path: %q (corpus+root %q), corpus+root of conflicting path: %q ", 169 path, crPath, longestCorpusRootForTicket) 170 me.WarnedOverlappingPrefix = true 171 } 172 173 longestCorpusRootForTicket = crPath 174 // Points inside corpus+root, match using the ticket. 175 var p string // Exact corpus+root match. 176 if vfsRemain != "" { 177 p = vfsRemain[1:] // Additional kythe path. 178 } 179 ticketResolution = &FilepathResolution{ 180 KytheURI: &kytheuri.URI{ 181 Corpus: corpus.Name, 182 Root: root, 183 Path: p, 184 }, 185 } 186 } else if len(crRemain) < len(crPath) && crRemain[:1] == sep { 187 // Queried path is a proper prefix. 188 parts := strings.SplitN(crRemain[1:], sep, 2) 189 nextDirs[parts[0]] = true 190 } 191 } 192 } 193 } 194 195 var results []FilepathResolution 196 for k := range nextDirs { 197 res := FilepathResolution{NextDirComponent: k} 198 results = append(results, res) 199 } 200 if ticketResolution != nil { 201 results = append(results, *ticketResolution) 202 } 203 return results, nil 204 } 205 206 // NonEmptyCorpora returns the non-empty named corpuses, and warns the first time 207 // an empty corpus name is encountered. 208 // 209 // Empty corpus names are not worth the trouble for special handling, given 210 // that naming corpora comes without drawbacks and is a good practice. 211 func (me *kytheFS) NonEmptyCorpora(cs []*ftpb.CorpusRootsReply_Corpus) []*ftpb.CorpusRootsReply_Corpus { 212 var res []*ftpb.CorpusRootsReply_Corpus 213 for _, c := range cs { 214 if c.Name != "" { 215 res = append(res, c) 216 } else if !me.WarnedEmptyCorpus { 217 log.Warningf("found empty corpus name, skipping mapping! " + 218 "Please set a corpus when extracting or indexing.") 219 me.WarnedEmptyCorpus = true 220 } 221 } 222 return res 223 } 224 225 // IsDirectory returns true if the given Kythe URI corresponds to a directory. 226 // 227 // Actually it checks that the path is not a known file. Could also use the 228 // filetree api to check contents of the parent. But I expect this code to 229 // change when caching is added, then we will determine directory-ness from 230 // a local cache (and it will be a separate concern how we fill that cache). 231 func (me *kytheFS) IsDirectory(uri *kytheuri.URI) (bool, error) { 232 ticket := uri.String() 233 req := &gpb.NodesRequest{ 234 Ticket: []string{ticket}, 235 // Minimize amount of data returned, ask for NodeKind only. 236 Filter: []string{facts.NodeKind}, 237 } 238 res, err := me.API.Nodes(me.Context, req) 239 if err != nil { 240 return false, nil 241 } 242 for k, n := range res.Nodes { 243 if k != ticket { 244 continue 245 } 246 if len(n.Facts) > 0 { 247 // Directory entries don't have any facts. 248 return false, nil 249 } 250 } 251 return true, nil 252 } 253 254 func (me *kytheFS) fetchSourceForURI(uri *kytheuri.URI) ([]byte, error) { 255 ticket := uri.String() 256 dec, err := me.API.Decorations(me.Context, &xpb.DecorationsRequest{ 257 Location: &xpb.Location{Ticket: ticket}, 258 SourceText: true, 259 }) 260 if err != nil { 261 return nil, err 262 } 263 return dec.SourceText, nil 264 } 265 266 func (me *kytheFS) fetchSource(path string) ([]byte, error) { 267 resolutions, err := me.ResolveFilepath(path) 268 if err != nil { 269 return nil, err 270 } 271 272 for _, r := range resolutions { 273 if r.KytheURI == nil { 274 continue 275 } 276 src, err := me.fetchSourceForURI(r.KytheURI) 277 if err != nil { 278 return nil, fmt.Errorf( 279 "no xrefs for %q (resolved to ticket %q): %v", 280 path, r.KytheURI.String(), err) 281 } 282 283 return src, nil 284 } 285 286 return nil, fmt.Errorf("couldn't resolve path %q to a ticket", path) 287 } 288 289 // GetAttr implements a go-fuse stub. 290 func (me *kytheFS) GetAttr(path string, context *fuse.Context) (*fuse.Attr, fuse.Status) { 291 resolutions, err := me.ResolveFilepath(path) 292 if err != nil { 293 log.Errorf("resolution error for %q: %v", path, err) 294 return nil, fuse.ENOENT 295 } 296 297 if hasDirComponent(resolutions) { 298 return &fuse.Attr{ 299 Mode: fuse.S_IFDIR | 0755, 300 }, fuse.OK 301 } 302 303 for _, r := range resolutions { 304 if r.KytheURI == nil { 305 continue 306 } 307 308 isDir, err := me.IsDirectory(r.KytheURI) 309 if err != nil { 310 return nil, fuse.ENOENT 311 } 312 313 if isDir { 314 return &fuse.Attr{ 315 Mode: fuse.S_IFDIR | 0755, 316 }, fuse.OK 317 } 318 319 src, err := me.fetchSourceForURI(r.KytheURI) 320 if err != nil { 321 return nil, fuse.ENOENT 322 } 323 return &fuse.Attr{ 324 Mode: fuse.S_IFREG | 0644, Size: uint64(len(src)), 325 }, fuse.OK 326 } 327 return nil, fuse.ENOENT 328 } 329 330 // OpenDir implements a go-fuse stub. 331 func (me *kytheFS) OpenDir(path string, context *fuse.Context) (c []fuse.DirEntry, code fuse.Status) { 332 resolutions, err := me.ResolveFilepath(path) 333 if err != nil { 334 log.Errorf("resolution error for %q: %v", path, err) 335 return nil, fuse.ENOENT 336 } 337 338 // Key by path component, since when a corpus+root segment overlaps with 339 // an actual deep path from an other corpus+root, we could get duplicate 340 // components otherwise. 341 ents := make(map[string]fuse.DirEntry) 342 for _, r := range resolutions { 343 if r.NextDirComponent != "" { 344 ents[r.NextDirComponent] = fuse.DirEntry{ 345 Name: r.NextDirComponent, 346 Mode: fuse.S_IFDIR, 347 } 348 } else if r.KytheURI != nil { 349 req := &ftpb.DirectoryRequest{ 350 Corpus: r.KytheURI.Corpus, 351 Root: r.KytheURI.Root, 352 Path: r.KytheURI.Path, 353 } 354 dir, err := me.API.Directory(me.Context, req) 355 if err != nil { 356 log.Errorf("error fetching dir contents for %q (ticket %q): %v", 357 path, r.KytheURI.String(), err) 358 return nil, fuse.ENOENT 359 } 360 361 for _, e := range dir.Entry { 362 de := fuse.DirEntry{Name: e.Name} 363 switch e.Kind { 364 case ftpb.DirectoryReply_FILE: 365 de.Mode = fuse.S_IFREG 366 case ftpb.DirectoryReply_DIRECTORY: 367 de.Mode = fuse.S_IFDIR 368 default: 369 log.Warningf("received invalid directory entry: %v", e) 370 continue 371 } 372 ents[e.Name] = de 373 } 374 } else { 375 log.Fatalf( 376 "Programming error: resoultion is neither dir part nor uri for %q", 377 path) 378 } 379 } 380 var result []fuse.DirEntry 381 for _, v := range ents { 382 result = append(result, v) 383 } 384 return result, fuse.OK 385 } 386 387 // Open implements a go-fuse stub. 388 func (me *kytheFS) Open(path string, flags uint32, context *fuse.Context) (file nodefs.File, code fuse.Status) { 389 // Read-only filesystem. 390 if flags&fuse.O_ANYWRITE != 0 { 391 return nil, fuse.EPERM 392 } 393 394 src, err := me.fetchSource(path) 395 if err != nil { 396 log.Errorf("error fetching source for %q: %v", path, err) 397 return nil, fuse.ENOENT 398 } 399 400 return nodefs.NewDataFile(src), fuse.OK 401 } 402 403 // 404 // Main 405 // 406 407 func main() { 408 flag.Parse() 409 if *serverAddr == "" { 410 log.Fatal("You must provide --server address") 411 } 412 if *mountPoint == "" { 413 log.Fatal("You must provide --mountpoint") 414 } 415 416 kytheAPI, err := api.ParseSpec(*serverAddr) 417 if err != nil { 418 log.Fatal("Failed to parse server address!", *serverAddr) 419 } 420 421 ctx := context.Background() 422 defer kytheAPI.Close(ctx) 423 424 nfs := pathfs.NewPathNodeFs(&kytheFS{ 425 FileSystem: pathfs.NewDefaultFileSystem(), 426 Context: ctx, 427 API: kytheAPI, 428 }, nil) 429 430 server, _, err := nodefs.MountRoot(*mountPoint, nfs.Root(), nil) 431 if err != nil { 432 log.Fatalf("Mounting failed: %v", err) 433 } 434 435 server.Serve() 436 }