kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/tools/read_entries/read_entries.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Binary read_entries scans the entries from a specified GraphStore and emits
    18  // them to stdout as a delimited stream.
    19  package main
    20  
    21  import (
    22  	"context"
    23  	"flag"
    24  	"fmt"
    25  	"os"
    26  	"sync"
    27  
    28  	"kythe.io/kythe/go/platform/delimited"
    29  	"kythe.io/kythe/go/platform/vfs"
    30  	"kythe.io/kythe/go/services/graphstore"
    31  	"kythe.io/kythe/go/storage/gsutil"
    32  	"kythe.io/kythe/go/util/flagutil"
    33  	"kythe.io/kythe/go/util/kytheuri"
    34  	"kythe.io/kythe/go/util/log"
    35  
    36  	spb "kythe.io/kythe/proto/storage_go_proto"
    37  
    38  	_ "kythe.io/kythe/go/services/graphstore/proxy"
    39  	_ "kythe.io/kythe/go/storage/leveldb"
    40  )
    41  
    42  var (
    43  	gs graphstore.Service
    44  
    45  	count = flag.Bool("count", false, "Only print the number of entries scanned")
    46  
    47  	shardsToFiles = flag.String("sharded_file", "", "If given, scan the entire GraphStore, storing each shard in a separate file instead of stdout (requires --shards)")
    48  	shardIndex    = flag.Int64("shard_index", 0, "Index of a single shard to emit (requires --shards)")
    49  	shards        = flag.Int64("shards", 0, "Number of shards to split the GraphStore")
    50  
    51  	edgeKind     = flag.String("edge_kind", "", "Edge kind by which to filter a read/scan")
    52  	targetTicket = flag.String("target", "", "Ticket of target by which to filter a scan")
    53  	factPrefix   = flag.String("fact_prefix", "", "Fact prefix by which to filter a scan")
    54  )
    55  
    56  func init() {
    57  	gsutil.Flag(&gs, "graphstore", "GraphStore to read")
    58  	flag.Usage = flagutil.SimpleUsage("Scans/reads the entries from a GraphStore, emitting a delimited entry stream to stdout",
    59  		"--graphstore spec [--count] [--shards N [--shard_index I] --sharded_file path] [--edge_kind] ([--fact_prefix str] [--target ticket] | [ticket...])")
    60  }
    61  
    62  func main() {
    63  	flag.Parse()
    64  	if gs == nil {
    65  		flagutil.UsageError("missing --graphstore")
    66  	} else if *shardsToFiles != "" && *shards <= 0 {
    67  		flagutil.UsageError("--sharded_file and --shards must be given together")
    68  	} else if *shards > 0 && len(flag.Args()) > 0 {
    69  		flagutil.UsageError("--shards and giving tickets for reads are mutually exclusive")
    70  	}
    71  
    72  	ctx := context.Background()
    73  
    74  	wr := delimited.NewWriter(os.Stdout)
    75  	var total int64
    76  	if *shards <= 0 {
    77  		entryFunc := func(entry *spb.Entry) error {
    78  			if *count {
    79  				total++
    80  				return nil
    81  			}
    82  			return wr.PutProto(entry)
    83  		}
    84  		if len(flag.Args()) > 0 {
    85  			if *targetTicket != "" || *factPrefix != "" {
    86  				log.Fatal("--target and --fact_prefix are unsupported when given tickets")
    87  			}
    88  			if err := readEntries(ctx, gs, entryFunc, *edgeKind, flag.Args()); err != nil {
    89  				log.Fatal(err)
    90  			}
    91  		} else {
    92  			if err := scanEntries(ctx, gs, entryFunc, *edgeKind, *targetTicket, *factPrefix); err != nil {
    93  				log.Fatal(err)
    94  			}
    95  		}
    96  		if *count {
    97  			fmt.Println(total)
    98  		}
    99  		return
   100  	}
   101  
   102  	sgs, ok := gs.(graphstore.Sharded)
   103  	if !ok {
   104  		log.Fatalf("Sharding unsupported for given GraphStore type: %T", gs)
   105  	} else if *shardIndex >= *shards {
   106  		log.Fatalf("Invalid shard index for %d shards: %d", *shards, *shardIndex)
   107  	}
   108  
   109  	if *count {
   110  		cnt, err := sgs.Count(ctx, &spb.CountRequest{Index: *shardIndex, Shards: *shards})
   111  		if err != nil {
   112  			log.Fatalf("ERROR: %v", err)
   113  		}
   114  		fmt.Println(cnt)
   115  		return
   116  	} else if *shardsToFiles != "" {
   117  		var wg sync.WaitGroup
   118  		wg.Add(int(*shards))
   119  		for i := int64(0); i < *shards; i++ {
   120  			go func(i int64) {
   121  				defer wg.Done()
   122  				path := fmt.Sprintf("%s-%.5d-of-%.5d", *shardsToFiles, i, *shards)
   123  				f, err := vfs.Create(ctx, path)
   124  				if err != nil {
   125  					log.Fatalf("Failed to create file %q: %v", path, err)
   126  				}
   127  				defer f.Close()
   128  				wr := delimited.NewWriter(f)
   129  				if err := sgs.Shard(ctx, &spb.ShardRequest{
   130  					Index:  i,
   131  					Shards: *shards,
   132  				}, func(entry *spb.Entry) error {
   133  					return wr.PutProto(entry)
   134  				}); err != nil {
   135  					log.Fatalf("GraphStore shard scan error: %v", err)
   136  				}
   137  			}(i)
   138  		}
   139  		wg.Wait()
   140  		return
   141  	}
   142  
   143  	if err := sgs.Shard(ctx, &spb.ShardRequest{
   144  		Index:  *shardIndex,
   145  		Shards: *shards,
   146  	}, func(entry *spb.Entry) error {
   147  		return wr.PutProto(entry)
   148  	}); err != nil {
   149  		log.Fatalf("GraphStore shard scan error: %v", err)
   150  	}
   151  }
   152  
   153  func readEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind string, tickets []string) error {
   154  	for _, ticket := range tickets {
   155  		src, err := kytheuri.ToVName(ticket)
   156  		if err != nil {
   157  			return fmt.Errorf("error parsing ticket %q: %v", ticket, err)
   158  		}
   159  		if err := gs.Read(ctx, &spb.ReadRequest{
   160  			Source:   src,
   161  			EdgeKind: edgeKind,
   162  		}, entryFunc); err != nil {
   163  			return fmt.Errorf("GraphStore Read error for ticket %q: %v", ticket, err)
   164  		}
   165  	}
   166  	return nil
   167  }
   168  
   169  func scanEntries(ctx context.Context, gs graphstore.Service, entryFunc graphstore.EntryFunc, edgeKind, targetTicket, factPrefix string) error {
   170  	var target *spb.VName
   171  	var err error
   172  	if targetTicket != "" {
   173  		target, err = kytheuri.ToVName(targetTicket)
   174  		if err != nil {
   175  			return fmt.Errorf("error parsing --target %q: %v", targetTicket, err)
   176  		}
   177  	}
   178  	if err := gs.Scan(ctx, &spb.ScanRequest{
   179  		EdgeKind:   edgeKind,
   180  		FactPrefix: factPrefix,
   181  		Target:     target,
   182  	}, entryFunc); err != nil {
   183  		return fmt.Errorf("GraphStore Scan error: %v", err)
   184  	}
   185  	return nil
   186  }