kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/filetree.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package pipeline
    18  
    19  import (
    20  	"fmt"
    21  	"path/filepath"
    22  	"reflect"
    23  	"sort"
    24  
    25  	"kythe.io/kythe/go/util/log"
    26  
    27  	"bitbucket.org/creachadair/stringset"
    28  	"kythe.io/kythe/go/serving/pipeline/nodes"
    29  	"kythe.io/kythe/go/util/compare"
    30  	"kythe.io/kythe/go/util/schema/facts"
    31  	kinds "kythe.io/kythe/go/util/schema/nodes"
    32  
    33  	"github.com/apache/beam/sdks/go/pkg/beam"
    34  
    35  	scpb "kythe.io/kythe/proto/schema_go_proto"
    36  	srvpb "kythe.io/kythe/proto/serving_go_proto"
    37  	spb "kythe.io/kythe/proto/storage_go_proto"
    38  )
    39  
    40  func init() {
    41  	beam.RegisterFunction(addCorpusRootsKey)
    42  	beam.RegisterFunction(anchorToBuildConfig)
    43  	beam.RegisterFunction(anchorToCorpusRoot)
    44  	beam.RegisterFunction(anchorToFileBuildConfig)
    45  	beam.RegisterFunction(fileToCorpusRoot)
    46  	beam.RegisterFunction(fileToDirectories)
    47  
    48  	beam.RegisterType(reflect.TypeOf((*combineCorpusRoots)(nil)).Elem())
    49  	beam.RegisterType(reflect.TypeOf((*combineDirectories)(nil)).Elem())
    50  }
    51  
    52  func (k *KytheBeam) getFileVNames() beam.PCollection {
    53  	if k.fileVNames.IsValid() {
    54  		return k.fileVNames
    55  	}
    56  	k.fileVNames = beam.DropValue(k.s, beam.Seq(k.s, k.nodes, &nodes.Filter{
    57  		FilterByKind: []string{kinds.File},
    58  		IncludeFacts: []string{},
    59  		IncludeEdges: []string{},
    60  	}, moveSourceToKey))
    61  	return k.fileVNames
    62  }
    63  
    64  func (k *KytheBeam) getAnchorBuildConfigs() beam.PCollection {
    65  	if k.anchorBuildConfigs.IsValid() {
    66  		return k.anchorBuildConfigs
    67  	}
    68  	k.anchorBuildConfigs = beam.Seq(k.s, k.nodes, &nodes.Filter{
    69  		FilterByKind: []string{kinds.Anchor},
    70  		IncludeFacts: []string{facts.BuildConfig},
    71  		IncludeEdges: []string{},
    72  	}, anchorToBuildConfig)
    73  	return k.anchorBuildConfigs
    74  }
    75  
    76  func anchorToBuildConfig(anchor *scpb.Node) (*spb.VName, string) {
    77  	var buildConfig string
    78  	for _, f := range anchor.Fact {
    79  		if f.GetKytheName() == scpb.FactName_BUILD_CONFIG {
    80  			buildConfig = string(f.Value)
    81  			break
    82  		}
    83  	}
    84  	return anchor.Source, buildConfig
    85  }
    86  
    87  // CorpusRoots returns the single *srvpb.CorpusRoots key-value for the Kythe
    88  // FileTree service.  The beam.PCollection has elements of type KV<string,
    89  // *srvpb.CorpusRoots>.
    90  func (k *KytheBeam) CorpusRoots() beam.PCollection {
    91  	s := k.s.Scope("CorpusRoots")
    92  	files := k.getFileVNames()
    93  	anchors := k.getAnchorBuildConfigs()
    94  	return beam.ParDo(s, addCorpusRootsKey,
    95  		beam.Combine(s, &combineCorpusRoots{}, beam.Flatten(s,
    96  			beam.ParDo(s, fileToCorpusRoot, files),
    97  			beam.ParDo(s, anchorToCorpusRoot, anchors),
    98  		)))
    99  }
   100  
   101  // Directories returns a Kythe *srvpb.FileDirectory table for the Kythe FileTree
   102  // service.  The beam.PCollection has elements of type KV<string,
   103  // *srvpb.FileDirectory>.
   104  func (k *KytheBeam) Directories() beam.PCollection {
   105  	s := k.s.Scope("Directories")
   106  	files := k.getFileVNames()
   107  	anchors := k.getAnchorBuildConfigs()
   108  	return beam.CombinePerKey(s, &combineDirectories{}, beam.Flatten(s,
   109  		beam.ParDo(s, fileToDirectories, files),
   110  		beam.ParDo(s, anchorToFileBuildConfig, anchors),
   111  	))
   112  }
   113  
   114  // addCorpusRootsKey returns the given value with the Kythe corpus roots key constant.
   115  func addCorpusRootsKey(val beam.T) (string, beam.T) { return "dirs:corpusRoots", val }
   116  
   117  func dirTicket(corpus, root, dir string) string {
   118  	return fmt.Sprintf("dirs:%s\n%s\n%s", corpus, root, dir)
   119  }
   120  
   121  // anchorToFileBuildConfig emits a FileDirectory for each path component in the
   122  // given anchor VName with its specified build config.
   123  func anchorToFileBuildConfig(anchor *spb.VName, buildConfig string, emit func(string, *srvpb.FileDirectory)) {
   124  	// Clean the file path and remove any leading slash.
   125  	path := filepath.Clean(filepath.Join("/", anchor.GetPath()))[1:]
   126  	dir := currentAsEmpty(filepath.Dir(path))
   127  	buildConfigs := []string{buildConfig}
   128  
   129  	corpus, root := anchor.GetCorpus(), anchor.GetRoot()
   130  	emit(dirTicket(corpus, root, dir), &srvpb.FileDirectory{
   131  		Entry: []*srvpb.FileDirectory_Entry{{
   132  			Name:        filepath.Base(path),
   133  			Kind:        srvpb.FileDirectory_FILE,
   134  			BuildConfig: buildConfigs,
   135  		}},
   136  	})
   137  
   138  	for dir != "" {
   139  		name := filepath.Base(dir)
   140  		dir = currentAsEmpty(filepath.Dir(dir))
   141  		emit(dirTicket(corpus, root, dir), &srvpb.FileDirectory{
   142  			Entry: []*srvpb.FileDirectory_Entry{{
   143  				Name:        name,
   144  				Kind:        srvpb.FileDirectory_DIRECTORY,
   145  				BuildConfig: buildConfigs,
   146  			}},
   147  		})
   148  	}
   149  }
   150  
   151  // fileToDirectories emits a FileDirectory for each path component in the given file VName.
   152  func fileToDirectories(file *spb.VName, emit func(string, *srvpb.FileDirectory)) {
   153  	// Clean the file path and remove any leading slash.
   154  	path := filepath.Clean(filepath.Join("/", file.GetPath()))[1:]
   155  
   156  	dir := currentAsEmpty(filepath.Dir(path))
   157  	emit(dirTicket(file.Corpus, file.Root, dir), &srvpb.FileDirectory{
   158  		Entry: []*srvpb.FileDirectory_Entry{{
   159  			Name: filepath.Base(path),
   160  			Kind: srvpb.FileDirectory_FILE,
   161  		}},
   162  	})
   163  	for dir != "" {
   164  		name := filepath.Base(dir)
   165  		dir = currentAsEmpty(filepath.Dir(dir))
   166  		emit(dirTicket(file.Corpus, file.Root, dir), &srvpb.FileDirectory{
   167  			Entry: []*srvpb.FileDirectory_Entry{{
   168  				Name: name,
   169  				Kind: srvpb.FileDirectory_DIRECTORY,
   170  			}},
   171  		})
   172  	}
   173  }
   174  
   175  func currentAsEmpty(p string) string {
   176  	if p == "." {
   177  		return ""
   178  	}
   179  	return p
   180  }
   181  
   182  // fileToCorpusRoot returns a CorpusRoots for the given file VName.
   183  func fileToCorpusRoot(file *spb.VName) *srvpb.CorpusRoots {
   184  	return &srvpb.CorpusRoots{
   185  		Corpus: []*srvpb.CorpusRoots_Corpus{{
   186  			Corpus: file.Corpus,
   187  			Root:   []string{file.Root},
   188  		}},
   189  	}
   190  }
   191  
   192  // anchorToCorpusRoot returns a CorpusRoots for the anchor VName and build config.
   193  func anchorToCorpusRoot(anchor *spb.VName, buildConfig string) *srvpb.CorpusRoots {
   194  	return &srvpb.CorpusRoots{
   195  		Corpus: []*srvpb.CorpusRoots_Corpus{{
   196  			Corpus:      anchor.Corpus,
   197  			Root:        []string{anchor.Root},
   198  			BuildConfig: []string{buildConfig},
   199  		}},
   200  	}
   201  }
   202  
   203  type combineCorpusRoots struct{}
   204  
   205  func (combineCorpusRoots) MergeAccumulators(accum, cr *srvpb.CorpusRoots) *srvpb.CorpusRoots {
   206  	for _, c := range cr.Corpus {
   207  		var corpus *srvpb.CorpusRoots_Corpus
   208  		for _, cc := range accum.Corpus {
   209  			if cc.Corpus == c.Corpus {
   210  				corpus = cc
   211  				break
   212  			}
   213  		}
   214  		if corpus == nil {
   215  			corpus = &srvpb.CorpusRoots_Corpus{Corpus: c.Corpus}
   216  			accum.Corpus = append(accum.Corpus, corpus)
   217  		}
   218  		corpus.Root = append(corpus.Root, c.Root...)
   219  		corpus.BuildConfig = append(corpus.BuildConfig, c.BuildConfig...)
   220  	}
   221  	return accum
   222  }
   223  
   224  func (combineCorpusRoots) ExtractOutput(cr *srvpb.CorpusRoots) *srvpb.CorpusRoots {
   225  	sort.Slice(cr.Corpus, func(i, j int) bool { return cr.Corpus[i].Corpus < cr.Corpus[j].Corpus })
   226  	for _, c := range cr.Corpus {
   227  		c.Root = removeDuplicates(c.Root)
   228  		c.BuildConfig = removeDuplicates(c.BuildConfig)
   229  	}
   230  	return cr
   231  }
   232  
   233  type combineDirectories struct{}
   234  
   235  func (combineDirectories) MergeAccumulators(accum, dir *srvpb.FileDirectory) *srvpb.FileDirectory {
   236  	accum.Entry = append(accum.Entry, dir.Entry...)
   237  	return accum
   238  }
   239  
   240  func (combineDirectories) ExtractOutput(dir *srvpb.FileDirectory) *srvpb.FileDirectory {
   241  	files := make(map[string]stringset.Set)
   242  	subdirs := make(map[string]stringset.Set)
   243  	for _, e := range dir.Entry {
   244  		switch e.Kind {
   245  		case srvpb.FileDirectory_FILE:
   246  			if configs, ok := files[e.Name]; ok {
   247  				configs.Add(e.BuildConfig...)
   248  			} else {
   249  				files[e.Name] = stringset.New(e.BuildConfig...)
   250  			}
   251  		case srvpb.FileDirectory_DIRECTORY:
   252  			if configs, ok := subdirs[e.Name]; ok {
   253  				configs.Add(e.BuildConfig...)
   254  			} else {
   255  				subdirs[e.Name] = stringset.New(e.BuildConfig...)
   256  			}
   257  		default:
   258  			log.Warningf("unknown FileDirectory kind: %v", e.Kind)
   259  		}
   260  	}
   261  	entries := make([]*srvpb.FileDirectory_Entry, 0, len(files)+len(subdirs))
   262  	for file, configs := range files {
   263  		entries = append(entries, &srvpb.FileDirectory_Entry{
   264  			Kind:        srvpb.FileDirectory_FILE,
   265  			Name:        file,
   266  			BuildConfig: configs.Elements(),
   267  		})
   268  	}
   269  	for subdir, configs := range subdirs {
   270  		entries = append(entries, &srvpb.FileDirectory_Entry{
   271  			Kind:        srvpb.FileDirectory_DIRECTORY,
   272  			Name:        subdir,
   273  			BuildConfig: configs.Elements(),
   274  		})
   275  	}
   276  	sort.Slice(entries, func(i, j int) bool {
   277  		return compare.Ints(int(entries[i].Kind), int(entries[j].Kind)).
   278  			AndThen(entries[i].Name, entries[j].Name) == compare.LT
   279  	})
   280  	return &srvpb.FileDirectory{Entry: entries}
   281  }
   282  
   283  func removeDuplicates(strs []string) []string {
   284  	if len(strs) <= 1 {
   285  		return strs
   286  	}
   287  	sort.Strings(strs)
   288  	j := 1
   289  	for i := 1; i < len(strs); i++ {
   290  		if strs[j-1] != strs[i] {
   291  			strs[j] = strs[i]
   292  			j++
   293  		}
   294  	}
   295  	return strs[:j]
   296  }