kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/storage/tools/triples/triples.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Binary triples implements a converter from an Entry stream to a stream of triples.
    18  //
    19  // Examples:
    20  //
    21  //	triples < entries > triples.nq
    22  //	triples entries > triples.nq.gz
    23  //	triples --graphstore path/to/gs > triples.nq.gz
    24  //	triples entries triples.nq
    25  //
    26  // Reference: http://en.wikipedia.org/wiki/N-Triples
    27  package main
    28  
    29  import (
    30  	"context"
    31  	"encoding/base64"
    32  	"flag"
    33  	"fmt"
    34  	"io"
    35  	"os"
    36  
    37  	"kythe.io/kythe/go/platform/vfs"
    38  	"kythe.io/kythe/go/services/graphstore"
    39  	"kythe.io/kythe/go/storage/gsutil"
    40  	"kythe.io/kythe/go/storage/stream"
    41  	"kythe.io/kythe/go/util/encoding/rdf"
    42  	"kythe.io/kythe/go/util/flagutil"
    43  	"kythe.io/kythe/go/util/kytheuri"
    44  	"kythe.io/kythe/go/util/log"
    45  	"kythe.io/kythe/go/util/schema/edges"
    46  	"kythe.io/kythe/go/util/schema/facts"
    47  
    48  	spb "kythe.io/kythe/proto/storage_go_proto"
    49  
    50  	_ "kythe.io/kythe/go/services/graphstore/proxy"
    51  	_ "kythe.io/kythe/go/storage/leveldb"
    52  )
    53  
    54  var (
    55  	keepReverseEdges = flag.Bool("keep_reverse_edges", false, "Do not filter reverse edges from triples output")
    56  	quiet            = flag.Bool("quiet", false, "Do not emit logging messages")
    57  
    58  	gs graphstore.Service
    59  )
    60  
    61  func init() {
    62  	gsutil.Flag(&gs, "graphstore", "Path to GraphStore to convert to triples (instead of an entry stream)")
    63  	flag.Usage = flagutil.SimpleUsage("Converts an Entry stream to a stream of triples",
    64  		"[(--graphstore path | entries_file) [triples_out]]")
    65  }
    66  
    67  func main() {
    68  	flag.Parse()
    69  
    70  	if len(flag.Args()) > 2 || (gs != nil && len(flag.Args()) > 1) {
    71  		fmt.Fprintf(os.Stderr, "ERROR: too many arguments %v\n", flag.Args())
    72  		flag.Usage()
    73  		os.Exit(1)
    74  	}
    75  
    76  	if gs != nil {
    77  		defer gsutil.LogClose(context.Background(), gs)
    78  	}
    79  
    80  	var in io.ReadCloser = os.Stdin
    81  	if gs == nil && len(flag.Args()) > 0 {
    82  		file, err := vfs.Open(context.Background(), flag.Arg(0))
    83  		if err != nil {
    84  			log.Fatalf("Failed to open input file %q: %v", flag.Arg(0), err)
    85  		}
    86  		defer file.Close()
    87  		in = file
    88  	}
    89  
    90  	outIdx := 1
    91  	if gs != nil {
    92  		outIdx = 0
    93  	}
    94  
    95  	var out io.WriteCloser = os.Stdout
    96  	if len(flag.Args()) > outIdx {
    97  		file, err := vfs.Create(context.Background(), flag.Arg(outIdx))
    98  		if err != nil {
    99  			log.Fatalf("Failed to create output file %q: %v", flag.Arg(outIdx), err)
   100  		}
   101  		defer file.Close()
   102  		out = file
   103  	}
   104  
   105  	var (
   106  		entries      <-chan *spb.Entry
   107  		reverseEdges int
   108  		triples      int
   109  	)
   110  
   111  	if gs == nil {
   112  		entries = stream.ReadEntries(in)
   113  	} else {
   114  		ch := make(chan *spb.Entry)
   115  		entries = ch
   116  		go func() {
   117  			defer close(ch)
   118  			if err := gs.Scan(context.Background(), &spb.ScanRequest{}, func(e *spb.Entry) error {
   119  				ch <- e
   120  				return nil
   121  			}); err != nil {
   122  				log.Fatalf("Error scanning graphstore: %v", err)
   123  			}
   124  		}()
   125  	}
   126  
   127  	for entry := range entries {
   128  		if edges.IsReverse(entry.EdgeKind) && !*keepReverseEdges {
   129  			reverseEdges++
   130  			continue
   131  		}
   132  
   133  		t, err := toTriple(entry)
   134  		if err != nil {
   135  			log.Fatal(err)
   136  		}
   137  		fmt.Fprintln(out, t)
   138  		triples++
   139  	}
   140  
   141  	if !*quiet {
   142  		if !*keepReverseEdges {
   143  			log.Infof("Skipped %d reverse edges", reverseEdges)
   144  		}
   145  		log.Infof("Wrote %d triples", triples)
   146  	}
   147  }
   148  
   149  // toTriple converts an Entry to the triple file format. Returns an error if
   150  // the entry is not valid.
   151  func toTriple(entry *spb.Entry) (*rdf.Triple, error) {
   152  	if err := graphstore.ValidEntry(entry); err != nil {
   153  		return nil, fmt.Errorf("invalid entry {%+v}: %v", entry, err)
   154  	}
   155  
   156  	t := &rdf.Triple{
   157  		Subject: kytheuri.FromVName(entry.Source).String(),
   158  	}
   159  	if graphstore.IsEdge(entry) {
   160  		t.Predicate = entry.EdgeKind
   161  		t.Object = kytheuri.FromVName(entry.Target).String()
   162  	} else if entry.FactName == facts.Code {
   163  		t.Predicate = entry.FactName
   164  		t.Object = base64.StdEncoding.EncodeToString(entry.FactValue)
   165  	} else {
   166  		t.Predicate = entry.FactName
   167  		t.Object = string(entry.FactValue)
   168  	}
   169  	return t, nil
   170  }