kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/tools/triples/triples.go (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Binary triples implements a converter from an Entry stream to a stream of triples. 18 // 19 // Examples: 20 // 21 // triples < entries > triples.nq 22 // triples entries > triples.nq.gz 23 // triples --graphstore path/to/gs > triples.nq.gz 24 // triples entries triples.nq 25 // 26 // Reference: http://en.wikipedia.org/wiki/N-Triples 27 package main 28 29 import ( 30 "context" 31 "encoding/base64" 32 "flag" 33 "fmt" 34 "io" 35 "os" 36 37 "kythe.io/kythe/go/platform/vfs" 38 "kythe.io/kythe/go/services/graphstore" 39 "kythe.io/kythe/go/storage/gsutil" 40 "kythe.io/kythe/go/storage/stream" 41 "kythe.io/kythe/go/util/encoding/rdf" 42 "kythe.io/kythe/go/util/flagutil" 43 "kythe.io/kythe/go/util/kytheuri" 44 "kythe.io/kythe/go/util/log" 45 "kythe.io/kythe/go/util/schema/edges" 46 "kythe.io/kythe/go/util/schema/facts" 47 48 spb "kythe.io/kythe/proto/storage_go_proto" 49 50 _ "kythe.io/kythe/go/services/graphstore/proxy" 51 _ "kythe.io/kythe/go/storage/leveldb" 52 ) 53 54 var ( 55 keepReverseEdges = flag.Bool("keep_reverse_edges", false, "Do not filter reverse edges from triples output") 56 quiet = flag.Bool("quiet", false, "Do not emit logging messages") 57 58 gs graphstore.Service 59 ) 60 61 func init() { 62 gsutil.Flag(&gs, "graphstore", "Path to GraphStore to convert to triples (instead of an entry stream)") 63 flag.Usage = flagutil.SimpleUsage("Converts an Entry stream to a stream of triples", 64 "[(--graphstore path | entries_file) [triples_out]]") 65 } 66 67 func main() { 68 flag.Parse() 69 70 if len(flag.Args()) > 2 || (gs != nil && len(flag.Args()) > 1) { 71 fmt.Fprintf(os.Stderr, "ERROR: too many arguments %v\n", flag.Args()) 72 flag.Usage() 73 os.Exit(1) 74 } 75 76 if gs != nil { 77 defer gsutil.LogClose(context.Background(), gs) 78 } 79 80 var in io.ReadCloser = os.Stdin 81 if gs == nil && len(flag.Args()) > 0 { 82 file, err := vfs.Open(context.Background(), flag.Arg(0)) 83 if err != nil { 84 log.Fatalf("Failed to open input file %q: %v", flag.Arg(0), err) 85 } 86 defer file.Close() 87 in = file 88 } 89 90 outIdx := 1 91 if gs != nil { 92 outIdx = 0 93 } 94 95 var out io.WriteCloser = os.Stdout 96 if len(flag.Args()) > outIdx { 97 file, err := vfs.Create(context.Background(), flag.Arg(outIdx)) 98 if err != nil { 99 log.Fatalf("Failed to create output file %q: %v", flag.Arg(outIdx), err) 100 } 101 defer file.Close() 102 out = file 103 } 104 105 var ( 106 entries <-chan *spb.Entry 107 reverseEdges int 108 triples int 109 ) 110 111 if gs == nil { 112 entries = stream.ReadEntries(in) 113 } else { 114 ch := make(chan *spb.Entry) 115 entries = ch 116 go func() { 117 defer close(ch) 118 if err := gs.Scan(context.Background(), &spb.ScanRequest{}, func(e *spb.Entry) error { 119 ch <- e 120 return nil 121 }); err != nil { 122 log.Fatalf("Error scanning graphstore: %v", err) 123 } 124 }() 125 } 126 127 for entry := range entries { 128 if edges.IsReverse(entry.EdgeKind) && !*keepReverseEdges { 129 reverseEdges++ 130 continue 131 } 132 133 t, err := toTriple(entry) 134 if err != nil { 135 log.Fatal(err) 136 } 137 fmt.Fprintln(out, t) 138 triples++ 139 } 140 141 if !*quiet { 142 if !*keepReverseEdges { 143 log.Infof("Skipped %d reverse edges", reverseEdges) 144 } 145 log.Infof("Wrote %d triples", triples) 146 } 147 } 148 149 // toTriple converts an Entry to the triple file format. Returns an error if 150 // the entry is not valid. 151 func toTriple(entry *spb.Entry) (*rdf.Triple, error) { 152 if err := graphstore.ValidEntry(entry); err != nil { 153 return nil, fmt.Errorf("invalid entry {%+v}: %v", entry, err) 154 } 155 156 t := &rdf.Triple{ 157 Subject: kytheuri.FromVName(entry.Source).String(), 158 } 159 if graphstore.IsEdge(entry) { 160 t.Predicate = entry.EdgeKind 161 t.Object = kytheuri.FromVName(entry.Target).String() 162 } else if entry.FactName == facts.Code { 163 t.Predicate = entry.FactName 164 t.Object = base64.StdEncoding.EncodeToString(entry.FactValue) 165 } else { 166 t.Predicate = entry.FactName 167 t.Object = string(entry.FactValue) 168 } 169 return t, nil 170 }