kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/tools/read_entries/read_entries.go (about) 1 /* 2 * Copyright 2014 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Binary read_entries scans the entries from a specified GraphStore and emits 18 // them to stdout as a delimited stream. 19 package main 20 21 import ( 22 "context" 23 "flag" 24 "fmt" 25 "os" 26 "sync" 27 28 "kythe.io/kythe/go/platform/delimited" 29 "kythe.io/kythe/go/platform/vfs" 30 "kythe.io/kythe/go/services/graphstore" 31 "kythe.io/kythe/go/storage/gsutil" 32 "kythe.io/kythe/go/util/flagutil" 33 "kythe.io/kythe/go/util/kytheuri" 34 "kythe.io/kythe/go/util/log" 35 36 spb "kythe.io/kythe/proto/storage_go_proto" 37 38 _ "kythe.io/kythe/go/services/graphstore/proxy" 39 _ "kythe.io/kythe/go/storage/leveldb" 40 ) 41 42 var ( 43 gs graphstore.Service 44 45 count = flag.Bool("count", false, "Only print the number of entries scanned") 46 47 shardsToFiles = flag.String("sharded_file", "", "If given, scan the entire GraphStore, storing each shard in a separate file instead of stdout (requires --shards)") 48 shardIndex = flag.Int64("shard_index", 0, "Index of a single shard to emit (requires --shards)") 49 shards = flag.Int64("shards", 0, "Number of shards to split the GraphStore") 50 51 edgeKind = flag.String("edge_kind", "", "Edge kind by which to filter a read/scan") 52 targetTicket = flag.String("target", "", "Ticket of target by which to filter a scan") 53 factPrefix = flag.String("fact_prefix", "", "Fact prefix by which to filter a scan") 54 ) 55 56 func init() { 57 gsutil.Flag(&gs, "graphstore", "GraphStore to read") 58 flag.Usage = flagutil.SimpleUsage("Scans/reads the entries from a GraphStore, emitting a delimited entry stream to stdout", 59 "--graphstore spec [--count] [--shards N [--shard_index I] --sharded_file path] [--edge_kind] ([--fact_prefix str] [--target ticket] | [ticket...])") 60 } 61 62 func main() { 63 flag.Parse() 64 if gs == nil { 65 flagutil.UsageError("missing --graphstore") 66 } else if *shardsToFiles != "" && *shards <= 0 { 67 flagutil.UsageError("--sharded_file and --shards must be given together") 68 } else if *shards > 0 && len(flag.Args()) > 0 { 69 flagutil.UsageError("--shards and giving tickets for reads are mutually exclusive") 70 } 71 72 ctx := context.Background() 73 74 wr := delimited.NewWriter(os.Stdout) 75 var total int64 76 if *shards <= 0 { 77 entryFunc := func(entry *spb.Entry) error { 78 if *count { 79 total++ 80 return nil 81 } 82 return wr.PutProto(entry) 83 } 84 if len(flag.Args()) > 0 { 85 if *targetTicket != "" || *factPrefix != "" { 86 log.Fatal("--target and --fact_prefix are unsupported when given tickets") 87 } 88 if err := readEntries(ctx, gs, entryFunc, *edgeKind, flag.Args()); err != nil { 89 log.Fatal(err) 90 } 91 } else { 92 if err := scanEntries(ctx, gs, entryFunc, *edgeKind, *targetTicket, *factPrefix); err != nil { 93 log.Fatal(err) 94 } 95 } 96 if *count { 97 fmt.Println(total) 98 } 99 return 100 } 101 102 sgs, ok := gs.(graphstore.Sharded) 103 if !ok { 104 log.Fatalf("Sharding unsupported for given GraphStore type: %T", gs) 105 } else if *shardIndex >= *shards { 106 log.Fatalf("Invalid shard index for %d shards: %d", *shards, *shardIndex) 107 } 108 109 if *count { 110 cnt, err := sgs.Count(ctx, &spb.CountRequest{Index: *shardIndex, Shards: *shards}) 111 if err != nil { 112 log.Fatalf("ERROR: %v", err) 113 } 114 fmt.Println(cnt) 115 return 116 } else if *shardsToFiles != "" { 117 var wg sync.WaitGroup 118 wg.Add(int(*shards)) 119 for i := int64(0); i < *shards; i++ { 120 go func(i int64) { 121 defer wg.Done() 122 path := fmt.Sprintf("%s-%.5d-of-%.5d", *shardsToFiles, i, *shards) 123 f, err := vfs.Create(ctx, path) 124 if err != nil { 125 log.Fatalf("Failed to create file %q: %v", path, err) 126 } 127 defer f.Close() 128 wr := delimited.NewWriter(f) 129 if err := sgs.Shard(ctx, &spb.ShardRequest{ 130 Index: i, 131 Shards: *shards, 132 }, func(entry *spb.Entry) error { 133 return wr.PutProto(entry) 134 }); err != nil { 135 log.Fatalf("GraphStore shard scan error: %v", err) 136 } 137 }(i) 138 } 139 wg.Wait() 140 return 141 } 142 143 if err := sgs.Shard(ctx, &spb.ShardRequest{ 144 Index: *shardIndex, 145 Shards: *shards, 146 }, func(entry *spb.Entry) error { 147 return wr.PutProto(entry) 148 }); err != nil { 149 log.Fatalf("GraphStore shard scan error: %v", err) 150 } 151 } 152 153 func readEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind string, tickets []string) error { 154 for _, ticket := range tickets { 155 src, err := kytheuri.ToVName(ticket) 156 if err != nil { 157 return fmt.Errorf("error parsing ticket %q: %v", ticket, err) 158 } 159 if err := gs.Read(ctx, &spb.ReadRequest{ 160 Source: src, 161 EdgeKind: edgeKind, 162 }, entryFunc); err != nil { 163 return fmt.Errorf("GraphStore Read error for ticket %q: %v", ticket, err) 164 } 165 } 166 return nil 167 } 168 169 func scanEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind, targetTicket, factPrefix string) error { 170 var target *spb.VName 171 var err error 172 if targetTicket != "" { 173 target, err = kytheuri.ToVName(targetTicket) 174 if err != nil { 175 return fmt.Errorf("error parsing --target %q: %v", targetTicket, err) 176 } 177 } 178 if err := gs.Scan(ctx, &spb.ScanRequest{ 179 EdgeKind: edgeKind, 180 FactPrefix: factPrefix, 181 Target: target, 182 }, entryFunc); err != nil { 183 return fmt.Errorf("GraphStore Scan error: %v", err) 184 } 185 return nil 186 }