kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/tools/directory_indexer/directory_indexer.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Binary directory_indexer produces a set of Entry protos representing the
    18  // files in the given directories.
    19  //
    20  // For instance, a file 'kythe/javatests/com/google/devtools/kythe/util/BUILD' would produce two entries:
    21  //
    22  //	{
    23  //	  "fact_name": "/kythe/node/kind",
    24  //	  "fact_value": "file",
    25  //	  "source": {
    26  //	    "signature": "c2b0d93b83c1b0e22fd564278be1b0373b1dcb67ff3bb77c2f29df7c393fe580",
    27  //	    "corpus": "kythe",
    28  //	    "root": "",
    29  //	    "path": "kythe/javatests/com/google/devtools/kythe/util/BUILD",
    30  //	    "language": ""
    31  //	  }
    32  //	}
    33  //	{
    34  //	  "fact_name": "/kythe/text",
    35  //	  "fact_value": "...",
    36  //	  "source": {
    37  //	    "signature": "c2b0d93b83c1b0e22fd564278be1b0373b1dcb67ff3bb77c2f29df7c393fe580",
    38  //	    "corpus": "kythe",
    39  //	    "root": "",
    40  //	    "path": "kythe/javatests/com/google/devtools/kythe/util/BUILD",
    41  //	    "language": ""
    42  //	  }
    43  //	}
    44  //
    45  // Usage:
    46  //
    47  //	directory_indexer --corpus kythe --root kythe ~/repo/kythe/ \
    48  //	  --exclude '^buildtools,^bazel-,^third_party,~$,#$,(^|/)\.'
    49  package main
    50  
    51  import (
    52  	"context"
    53  	"crypto/sha256"
    54  	"encoding/hex"
    55  	"flag"
    56  	"os"
    57  	"path/filepath"
    58  	"regexp"
    59  	"strings"
    60  
    61  	"kythe.io/kythe/go/platform/delimited"
    62  	"kythe.io/kythe/go/platform/vfs"
    63  	"kythe.io/kythe/go/util/flagutil"
    64  	"kythe.io/kythe/go/util/log"
    65  	"kythe.io/kythe/go/util/vnameutil"
    66  	spb "kythe.io/kythe/proto/storage_go_proto"
    67  )
    68  
    69  func init() {
    70  	flag.Usage = flagutil.SimpleUsage("Produce a stream of entries representing the files in the given directories",
    71  		"[--verbose] [--emit_irregular] [--vnames path] [--exclude re0,re1,...,reN] [directories]")
    72  }
    73  
    74  var (
    75  	vnamesConfigPath = flag.String("vnames", "", "Path to JSON VNames configuration")
    76  	exclude          = flag.String("exclude", "", "Comma-separated list of exclude regexp patterns")
    77  	verbose          = flag.Bool("verbose", false, "Print verbose logging")
    78  	emitIrregular    = flag.Bool("emit_irregular", false, "Emit nodes for irregular files")
    79  )
    80  
    81  var (
    82  	kindLabel = "/kythe/node/kind"
    83  	textLabel = "/kythe/text"
    84  
    85  	fileKind = []byte("file")
    86  )
    87  
    88  var w = delimited.NewWriter(os.Stdout)
    89  
    90  func emitEntry(v *spb.VName, label string, value []byte) error {
    91  	return w.PutProto(&spb.Entry{Source: v, FactName: label, FactValue: value})
    92  }
    93  
    94  var (
    95  	fileRules vnameutil.Rules
    96  	excludes  []*regexp.Regexp
    97  )
    98  
    99  func emitPath(path string, info os.FileInfo, err error) error {
   100  	if err != nil {
   101  		return err
   102  	}
   103  	if info.IsDir() || !(*emitIrregular || info.Mode().IsRegular()) {
   104  		return nil
   105  	}
   106  	for _, re := range excludes {
   107  		if re.MatchString(path) {
   108  			return nil
   109  		}
   110  	}
   111  
   112  	if *verbose {
   113  		log.Infof("Reading/emitting %s", path)
   114  	}
   115  	contents, err := vfs.ReadFile(context.Background(), path)
   116  	if err != nil {
   117  		return err
   118  	}
   119  	vName := fileRules.ApplyDefault(path, new(spb.VName))
   120  
   121  	digest := sha256.Sum256(contents)
   122  	vName.Signature = hex.EncodeToString(digest[:])
   123  
   124  	if vName.Path == "" {
   125  		vName.Path = path
   126  	}
   127  
   128  	if err := emitEntry(vName, kindLabel, fileKind); err != nil {
   129  		return err
   130  	}
   131  	return emitEntry(vName, textLabel, contents)
   132  }
   133  
   134  func main() {
   135  	flag.Parse()
   136  
   137  	if *exclude != "" {
   138  		for _, pattern := range strings.Split(*exclude, ",") {
   139  			excludes = append(excludes, regexp.MustCompile(pattern))
   140  		}
   141  	}
   142  
   143  	if data, err := vfs.ReadFile(context.Background(), *vnamesConfigPath); err != nil {
   144  		log.Fatalf("Unable to read VNames config file %q: %v", *vnamesConfigPath, err)
   145  	} else if rules, err := vnameutil.ParseRules(data); err != nil {
   146  		log.Fatalf("Invalid VName rules: %v", err)
   147  	} else {
   148  		fileRules = rules
   149  	}
   150  
   151  	dirs := flag.Args()
   152  	if len(dirs) == 0 {
   153  		dirs = []string{"."}
   154  	}
   155  
   156  	for _, dir := range dirs {
   157  		if err := filepath.Walk(dir, emitPath); err != nil {
   158  			log.Fatalf("Error walking %s: %v", dir, err)
   159  		}
   160  	}
   161  }