kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/serving/pipeline/filetree.go (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package pipeline 18 19 import ( 20 "fmt" 21 "path/filepath" 22 "reflect" 23 "sort" 24 25 "kythe.io/kythe/go/util/log" 26 27 "bitbucket.org/creachadair/stringset" 28 "kythe.io/kythe/go/serving/pipeline/nodes" 29 "kythe.io/kythe/go/util/compare" 30 "kythe.io/kythe/go/util/schema/facts" 31 kinds "kythe.io/kythe/go/util/schema/nodes" 32 33 "github.com/apache/beam/sdks/go/pkg/beam" 34 35 scpb "kythe.io/kythe/proto/schema_go_proto" 36 srvpb "kythe.io/kythe/proto/serving_go_proto" 37 spb "kythe.io/kythe/proto/storage_go_proto" 38 ) 39 40 func init() { 41 beam.RegisterFunction(addCorpusRootsKey) 42 beam.RegisterFunction(anchorToBuildConfig) 43 beam.RegisterFunction(anchorToCorpusRoot) 44 beam.RegisterFunction(anchorToFileBuildConfig) 45 beam.RegisterFunction(fileToCorpusRoot) 46 beam.RegisterFunction(fileToDirectories) 47 48 beam.RegisterType(reflect.TypeOf((*combineCorpusRoots)(nil)).Elem()) 49 beam.RegisterType(reflect.TypeOf((*combineDirectories)(nil)).Elem()) 50 } 51 52 func (k *KytheBeam) getFileVNames() beam.PCollection { 53 if k.fileVNames.IsValid() { 54 return k.fileVNames 55 } 56 k.fileVNames = beam.DropValue(k.s, beam.Seq(k.s, k.nodes, &nodes.Filter{ 57 FilterByKind: []string{kinds.File}, 58 IncludeFacts: []string{}, 59 IncludeEdges: []string{}, 60 }, moveSourceToKey)) 61 return k.fileVNames 62 } 63 64 func (k *KytheBeam) getAnchorBuildConfigs() beam.PCollection { 65 if k.anchorBuildConfigs.IsValid() { 66 return k.anchorBuildConfigs 67 } 68 k.anchorBuildConfigs = beam.Seq(k.s, k.nodes, &nodes.Filter{ 69 FilterByKind: []string{kinds.Anchor}, 70 IncludeFacts: []string{facts.BuildConfig}, 71 IncludeEdges: []string{}, 72 }, anchorToBuildConfig) 73 return k.anchorBuildConfigs 74 } 75 76 func anchorToBuildConfig(anchor *scpb.Node) (*spb.VName, string) { 77 var buildConfig string 78 for _, f := range anchor.Fact { 79 if f.GetKytheName() == scpb.FactName_BUILD_CONFIG { 80 buildConfig = string(f.Value) 81 break 82 } 83 } 84 return anchor.Source, buildConfig 85 } 86 87 // CorpusRoots returns the single *srvpb.CorpusRoots key-value for the Kythe 88 // FileTree service. The beam.PCollection has elements of type KV<string, 89 // *srvpb.CorpusRoots>. 90 func (k *KytheBeam) CorpusRoots() beam.PCollection { 91 s := k.s.Scope("CorpusRoots") 92 files := k.getFileVNames() 93 anchors := k.getAnchorBuildConfigs() 94 return beam.ParDo(s, addCorpusRootsKey, 95 beam.Combine(s, &combineCorpusRoots{}, beam.Flatten(s, 96 beam.ParDo(s, fileToCorpusRoot, files), 97 beam.ParDo(s, anchorToCorpusRoot, anchors), 98 ))) 99 } 100 101 // Directories returns a Kythe *srvpb.FileDirectory table for the Kythe FileTree 102 // service. The beam.PCollection has elements of type KV<string, 103 // *srvpb.FileDirectory>. 104 func (k *KytheBeam) Directories() beam.PCollection { 105 s := k.s.Scope("Directories") 106 files := k.getFileVNames() 107 anchors := k.getAnchorBuildConfigs() 108 return beam.CombinePerKey(s, &combineDirectories{}, beam.Flatten(s, 109 beam.ParDo(s, fileToDirectories, files), 110 beam.ParDo(s, anchorToFileBuildConfig, anchors), 111 )) 112 } 113 114 // addCorpusRootsKey returns the given value with the Kythe corpus roots key constant. 115 func addCorpusRootsKey(val beam.T) (string, beam.T) { return "dirs:corpusRoots", val } 116 117 func dirTicket(corpus, root, dir string) string { 118 return fmt.Sprintf("dirs:%s\n%s\n%s", corpus, root, dir) 119 } 120 121 // anchorToFileBuildConfig emits a FileDirectory for each path component in the 122 // given anchor VName with its specified build config. 123 func anchorToFileBuildConfig(anchor *spb.VName, buildConfig string, emit func(string, *srvpb.FileDirectory)) { 124 // Clean the file path and remove any leading slash. 125 path := filepath.Clean(filepath.Join("/", anchor.GetPath()))[1:] 126 dir := currentAsEmpty(filepath.Dir(path)) 127 buildConfigs := []string{buildConfig} 128 129 corpus, root := anchor.GetCorpus(), anchor.GetRoot() 130 emit(dirTicket(corpus, root, dir), &srvpb.FileDirectory{ 131 Entry: []*srvpb.FileDirectory_Entry{{ 132 Name: filepath.Base(path), 133 Kind: srvpb.FileDirectory_FILE, 134 BuildConfig: buildConfigs, 135 }}, 136 }) 137 138 for dir != "" { 139 name := filepath.Base(dir) 140 dir = currentAsEmpty(filepath.Dir(dir)) 141 emit(dirTicket(corpus, root, dir), &srvpb.FileDirectory{ 142 Entry: []*srvpb.FileDirectory_Entry{{ 143 Name: name, 144 Kind: srvpb.FileDirectory_DIRECTORY, 145 BuildConfig: buildConfigs, 146 }}, 147 }) 148 } 149 } 150 151 // fileToDirectories emits a FileDirectory for each path component in the given file VName. 152 func fileToDirectories(file *spb.VName, emit func(string, *srvpb.FileDirectory)) { 153 // Clean the file path and remove any leading slash. 154 path := filepath.Clean(filepath.Join("/", file.GetPath()))[1:] 155 156 dir := currentAsEmpty(filepath.Dir(path)) 157 emit(dirTicket(file.Corpus, file.Root, dir), &srvpb.FileDirectory{ 158 Entry: []*srvpb.FileDirectory_Entry{{ 159 Name: filepath.Base(path), 160 Kind: srvpb.FileDirectory_FILE, 161 }}, 162 }) 163 for dir != "" { 164 name := filepath.Base(dir) 165 dir = currentAsEmpty(filepath.Dir(dir)) 166 emit(dirTicket(file.Corpus, file.Root, dir), &srvpb.FileDirectory{ 167 Entry: []*srvpb.FileDirectory_Entry{{ 168 Name: name, 169 Kind: srvpb.FileDirectory_DIRECTORY, 170 }}, 171 }) 172 } 173 } 174 175 func currentAsEmpty(p string) string { 176 if p == "." { 177 return "" 178 } 179 return p 180 } 181 182 // fileToCorpusRoot returns a CorpusRoots for the given file VName. 183 func fileToCorpusRoot(file *spb.VName) *srvpb.CorpusRoots { 184 return &srvpb.CorpusRoots{ 185 Corpus: []*srvpb.CorpusRoots_Corpus{{ 186 Corpus: file.Corpus, 187 Root: []string{file.Root}, 188 }}, 189 } 190 } 191 192 // anchorToCorpusRoot returns a CorpusRoots for the anchor VName and build config. 193 func anchorToCorpusRoot(anchor *spb.VName, buildConfig string) *srvpb.CorpusRoots { 194 return &srvpb.CorpusRoots{ 195 Corpus: []*srvpb.CorpusRoots_Corpus{{ 196 Corpus: anchor.Corpus, 197 Root: []string{anchor.Root}, 198 BuildConfig: []string{buildConfig}, 199 }}, 200 } 201 } 202 203 type combineCorpusRoots struct{} 204 205 func (combineCorpusRoots) MergeAccumulators(accum, cr *srvpb.CorpusRoots) *srvpb.CorpusRoots { 206 for _, c := range cr.Corpus { 207 var corpus *srvpb.CorpusRoots_Corpus 208 for _, cc := range accum.Corpus { 209 if cc.Corpus == c.Corpus { 210 corpus = cc 211 break 212 } 213 } 214 if corpus == nil { 215 corpus = &srvpb.CorpusRoots_Corpus{Corpus: c.Corpus} 216 accum.Corpus = append(accum.Corpus, corpus) 217 } 218 corpus.Root = append(corpus.Root, c.Root...) 219 corpus.BuildConfig = append(corpus.BuildConfig, c.BuildConfig...) 220 } 221 return accum 222 } 223 224 func (combineCorpusRoots) ExtractOutput(cr *srvpb.CorpusRoots) *srvpb.CorpusRoots { 225 sort.Slice(cr.Corpus, func(i, j int) bool { return cr.Corpus[i].Corpus < cr.Corpus[j].Corpus }) 226 for _, c := range cr.Corpus { 227 c.Root = removeDuplicates(c.Root) 228 c.BuildConfig = removeDuplicates(c.BuildConfig) 229 } 230 return cr 231 } 232 233 type combineDirectories struct{} 234 235 func (combineDirectories) MergeAccumulators(accum, dir *srvpb.FileDirectory) *srvpb.FileDirectory { 236 accum.Entry = append(accum.Entry, dir.Entry...) 237 return accum 238 } 239 240 func (combineDirectories) ExtractOutput(dir *srvpb.FileDirectory) *srvpb.FileDirectory { 241 files := make(map[string]stringset.Set) 242 subdirs := make(map[string]stringset.Set) 243 for _, e := range dir.Entry { 244 switch e.Kind { 245 case srvpb.FileDirectory_FILE: 246 if configs, ok := files[e.Name]; ok { 247 configs.Add(e.BuildConfig...) 248 } else { 249 files[e.Name] = stringset.New(e.BuildConfig...) 250 } 251 case srvpb.FileDirectory_DIRECTORY: 252 if configs, ok := subdirs[e.Name]; ok { 253 configs.Add(e.BuildConfig...) 254 } else { 255 subdirs[e.Name] = stringset.New(e.BuildConfig...) 256 } 257 default: 258 log.Warningf("unknown FileDirectory kind: %v", e.Kind) 259 } 260 } 261 entries := make([]*srvpb.FileDirectory_Entry, 0, len(files)+len(subdirs)) 262 for file, configs := range files { 263 entries = append(entries, &srvpb.FileDirectory_Entry{ 264 Kind: srvpb.FileDirectory_FILE, 265 Name: file, 266 BuildConfig: configs.Elements(), 267 }) 268 } 269 for subdir, configs := range subdirs { 270 entries = append(entries, &srvpb.FileDirectory_Entry{ 271 Kind: srvpb.FileDirectory_DIRECTORY, 272 Name: subdir, 273 BuildConfig: configs.Elements(), 274 }) 275 } 276 sort.Slice(entries, func(i, j int) bool { 277 return compare.Ints(int(entries[i].Kind), int(entries[j].Kind)). 278 AndThen(entries[i].Name, entries[j].Name) == compare.LT 279 }) 280 return &srvpb.FileDirectory{Entry: entries} 281 } 282 283 func removeDuplicates(strs []string) []string { 284 if len(strs) <= 1 { 285 return strs 286 } 287 sort.Strings(strs) 288 j := 1 289 for i := 1; i < len(strs); i++ { 290 if strs[j-1] != strs[i] { 291 strs[j] = strs[i] 292 j++ 293 } 294 } 295 return strs[:j] 296 }