github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/live/run.go (about)

     1  /*
     2   * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package live
    18  
    19  import (
    20  	"bufio"
    21  	"compress/gzip"
    22  	"context"
    23  	"crypto/tls"
    24  	"encoding/json"
    25  	"fmt"
    26  	"io"
    27  	"math"
    28  	"math/rand"
    29  	"net/http"
    30  	_ "net/http/pprof" // http profiler
    31  	"os"
    32  	"sort"
    33  	"strconv"
    34  	"strings"
    35  	"time"
    36  
    37  	"github.com/dgryski/go-farm"
    38  	"github.com/golang/glog"
    39  	"github.com/pkg/errors"
    40  	"github.com/spf13/cobra"
    41  	"github.com/spf13/viper"
    42  	"google.golang.org/grpc"
    43  	"google.golang.org/grpc/metadata"
    44  
    45  	"github.com/dgraph-io/badger/v3"
    46  	bopt "github.com/dgraph-io/badger/v3/options"
    47  	"github.com/dgraph-io/dgo/v210"
    48  	"github.com/dgraph-io/dgo/v210/protos/api"
    49  	"github.com/dgraph-io/dgraph/chunker"
    50  	"github.com/dgraph-io/dgraph/ee"
    51  	"github.com/dgraph-io/dgraph/ee/enc"
    52  	"github.com/dgraph-io/dgraph/filestore"
    53  	schemapkg "github.com/dgraph-io/dgraph/schema"
    54  	"github.com/dgraph-io/dgraph/types"
    55  	"github.com/dgraph-io/dgraph/x"
    56  	"github.com/dgraph-io/dgraph/xidmap"
    57  	"github.com/dgraph-io/ristretto/z"
    58  )
    59  
    60  type options struct {
    61  	dataFiles       string
    62  	dataFormat      string
    63  	schemaFile      string
    64  	zero            string
    65  	concurrent      int
    66  	batchSize       int
    67  	clientDir       string
    68  	authToken       string
    69  	useCompression  bool
    70  	newUids         bool
    71  	verbose         bool
    72  	httpAddr        string
    73  	bufferSize      int
    74  	upsertPredicate string
    75  	tmpDir          string
    76  	key             x.Sensitive
    77  	namespaceToLoad uint64
    78  	preserveNs      bool
    79  }
    80  
    81  type predicate struct {
    82  	Predicate  string   `json:"predicate,omitempty"`
    83  	Type       string   `json:"type,omitempty"`
    84  	Tokenizer  []string `json:"tokenizer,omitempty"`
    85  	Count      bool     `json:"count,omitempty"`
    86  	List       bool     `json:"list,omitempty"`
    87  	Lang       bool     `json:"lang,omitempty"`
    88  	Index      bool     `json:"index,omitempty"`
    89  	Upsert     bool     `json:"upsert,omitempty"`
    90  	Reverse    bool     `json:"reverse,omitempty"`
    91  	NoConflict bool     `json:"no_conflict,omitempty"`
    92  	ValueType  types.TypeID
    93  }
    94  
    95  type schema struct {
    96  	Predicates []*predicate `json:"schema,omitempty"`
    97  	preds      map[string]*predicate
    98  }
    99  
   100  type request struct {
   101  	*api.Mutation
   102  	conflicts []uint64
   103  }
   104  
   105  func (l *schema) init(ns uint64, galaxyOperation bool) {
   106  	l.preds = make(map[string]*predicate)
   107  	for _, i := range l.Predicates {
   108  		i.ValueType, _ = types.TypeForName(i.Type)
   109  		if !galaxyOperation {
   110  			i.Predicate = x.NamespaceAttr(ns, i.Predicate)
   111  		}
   112  		l.preds[i.Predicate] = i
   113  	}
   114  }
   115  
   116  var (
   117  	opt options
   118  	sch schema
   119  
   120  	// Live is the sub-command invoked when running "dgraph live".
   121  	Live x.SubCommand
   122  )
   123  
   124  func init() {
   125  	Live.Cmd = &cobra.Command{
   126  		Use:   "live",
   127  		Short: "Run Dgraph Live Loader",
   128  		Run: func(cmd *cobra.Command, args []string) {
   129  			defer x.StartProfile(Live.Conf).Stop()
   130  			if err := run(); err != nil {
   131  				x.Check2(fmt.Fprintf(os.Stderr, "%s", err.Error()))
   132  				os.Exit(1)
   133  			}
   134  		},
   135  		Annotations: map[string]string{"group": "data-load"},
   136  	}
   137  	Live.EnvPrefix = "DGRAPH_LIVE"
   138  	Live.Cmd.SetHelpTemplate(x.NonRootTemplate)
   139  
   140  	flag := Live.Cmd.Flags()
   141  	// --vault SuperFlag and encryption flags
   142  	ee.RegisterEncFlag(flag)
   143  	// --tls SuperFlag
   144  	x.RegisterClientTLSFlags(flag)
   145  
   146  	flag.StringP("files", "f", "", "Location of *.rdf(.gz) or *.json(.gz) file(s) to load")
   147  	flag.StringP("schema", "s", "", "Location of schema file")
   148  	flag.String("format", "", "Specify file format (rdf or json) instead of getting it "+
   149  		"from filename")
   150  	flag.StringP("alpha", "a", "127.0.0.1:9080",
   151  		"Comma-separated list of Dgraph alpha gRPC server addresses")
   152  	flag.StringP("zero", "z", "127.0.0.1:5080", "Dgraph zero gRPC server address")
   153  	flag.IntP("conc", "c", 10,
   154  		"Number of concurrent requests to make to Dgraph")
   155  	flag.IntP("batch", "b", 1000,
   156  		"Number of N-Quads to send as part of a mutation.")
   157  	flag.StringP("xidmap", "x", "", "Directory to store xid to uid mapping")
   158  	flag.StringP("auth_token", "t", "",
   159  		"The auth token passed to the server for Alter operation of the schema file. "+
   160  			"If used with --slash_grpc_endpoint, then this should be set to the API token issued"+
   161  			"by Slash GraphQL")
   162  	flag.String("slash_grpc_endpoint", "", "Path to Slash GraphQL GRPC endpoint. "+
   163  		"If --slash_grpc_endpoint is set, all other TLS options and connection options will be"+
   164  		"ignored")
   165  	flag.BoolP("use_compression", "C", false,
   166  		"Enable compression on connection to alpha server")
   167  	flag.Bool("new_uids", false,
   168  		"Ignore UIDs in load files and assign new ones.")
   169  	flag.String("http", "localhost:6060", "Address to serve http (pprof).")
   170  	flag.Bool("verbose", false, "Run the live loader in verbose mode")
   171  
   172  	flag.String("creds", "",
   173  		`Various login credentials if login is required.
   174  	user defines the username to login.
   175  	password defines the password of the user.
   176  	namespace defines the namespace to log into.
   177  	Sample flag could look like --creds user=username;password=mypass;namespace=2`)
   178  
   179  	flag.StringP("bufferSize", "m", "100", "Buffer for each thread")
   180  	flag.StringP("upsertPredicate", "U", "", "run in upsertPredicate mode. the value would "+
   181  		"be used to store blank nodes as an xid")
   182  	flag.String("tmp", "t", "Directory to store temporary buffers.")
   183  	flag.Int64("force-namespace", 0, "Namespace onto which to load the data."+
   184  		"Only guardian of galaxy should use this for loading data into multiple namespaces or some"+
   185  		"specific namespace. Setting it to negative value will preserve the namespace.")
   186  }
   187  
   188  func getSchema(ctx context.Context, dgraphClient *dgo.Dgraph, galaxyOperation bool) (*schema, error) {
   189  	txn := dgraphClient.NewTxn()
   190  	defer func() {
   191  		if err := txn.Discard(ctx); err != nil {
   192  			glog.Warningf("error in discarding txn: %v", err)
   193  		}
   194  	}()
   195  
   196  	res, err := txn.Query(ctx, "schema {}")
   197  	if err != nil {
   198  		return nil, err
   199  	}
   200  
   201  	err = json.Unmarshal(res.GetJson(), &sch)
   202  	if err != nil {
   203  		return nil, err
   204  	}
   205  	// If we are not loading data across namespaces, the schema query result will not contain the
   206  	// namespace information. Set it inside the init function.
   207  	sch.init(opt.namespaceToLoad, galaxyOperation)
   208  	return &sch, nil
   209  }
   210  
   211  // validate that the schema contains the predicates whose namespace exist.
   212  func validateSchema(sch string, namespaces map[uint64]struct{}) error {
   213  	result, err := schemapkg.Parse(sch)
   214  	if err != nil {
   215  		return err
   216  	}
   217  	for _, pred := range result.Preds {
   218  		ns := x.ParseNamespace(pred.Predicate)
   219  		if _, ok := namespaces[ns]; !ok {
   220  			return errors.Errorf("Namespace %#x doesn't exist for pred %s.", ns, pred.Predicate)
   221  		}
   222  	}
   223  	for _, typ := range result.Types {
   224  		ns := x.ParseNamespace(typ.TypeName)
   225  		if _, ok := namespaces[ns]; !ok {
   226  			return errors.Errorf("Namespace %#x doesn't exist for type %s.", ns, typ.TypeName)
   227  		}
   228  	}
   229  	return nil
   230  }
   231  
   232  // processSchemaFile process schema for a given gz file.
   233  func (l *loader) processSchemaFile(ctx context.Context, file string, key x.Sensitive,
   234  	dgraphClient *dgo.Dgraph) error {
   235  	fmt.Printf("\nProcessing schema file %q\n", file)
   236  	if len(opt.authToken) > 0 {
   237  		md := metadata.New(nil)
   238  		md.Append("auth-token", opt.authToken)
   239  		ctx = metadata.NewOutgoingContext(ctx, md)
   240  	}
   241  
   242  	f, err := filestore.Open(file)
   243  	x.CheckfNoTrace(err)
   244  	defer f.Close()
   245  
   246  	reader, err := enc.GetReader(key, f)
   247  	x.Check(err)
   248  	if strings.HasSuffix(strings.ToLower(file), ".gz") {
   249  		reader, err = gzip.NewReader(reader)
   250  		x.Check(err)
   251  	}
   252  
   253  	b, err := io.ReadAll(reader)
   254  	if err != nil {
   255  		x.Checkf(err, "Error while reading file")
   256  	}
   257  
   258  	op := &api.Operation{}
   259  	op.Schema = string(b)
   260  	if opt.preserveNs {
   261  		// Verify schema if we are loding into multiple namespaces.
   262  		if err := validateSchema(op.Schema, l.namespaces); err != nil {
   263  			return err
   264  		}
   265  	}
   266  	return dgraphClient.Alter(ctx, op)
   267  }
   268  
   269  func (l *loader) uid(val string, ns uint64) string {
   270  	// Attempt to parse as a UID (in the same format that dgraph outputs - a
   271  	// hex number prefixed by "0x"). If parsing succeeds, then this is assumed
   272  	// to be an existing node in the graph. There is limited protection against
   273  	// a user selecting an unassigned UID in this way - it may be assigned
   274  	// later to another node. It is up to the user to avoid this.
   275  	if !opt.newUids {
   276  		if uid, err := strconv.ParseUint(val, 0, 64); err == nil {
   277  			return fmt.Sprintf("%#x", uid)
   278  		}
   279  	}
   280  
   281  	// TODO(Naman): Do we still need this here? As xidmap which uses btree does not keep hold of
   282  	// this string.
   283  	sb := strings.Builder{}
   284  	x.Check2(sb.WriteString(x.NamespaceAttr(ns, val)))
   285  	uid, _ := l.alloc.AssignUid(sb.String())
   286  
   287  	return fmt.Sprintf("%#x", uint64(uid))
   288  }
   289  
   290  func generateBlankNode(val string) string {
   291  	// generates "u_hash(val)"
   292  
   293  	sb := strings.Builder{}
   294  	x.Check2(sb.WriteString("u_"))
   295  	x.Check2(sb.WriteString(strconv.FormatUint(farm.Fingerprint64([]byte(val)), 10)))
   296  	return sb.String()
   297  }
   298  
   299  func generateUidFunc(val string) string {
   300  	// generates "uid(val)"
   301  
   302  	sb := strings.Builder{}
   303  	sb.WriteString("uid(")
   304  	sb.WriteString(val)
   305  	sb.WriteRune(')')
   306  	return sb.String()
   307  }
   308  
   309  func generateQuery(node, predicate, xid string) string {
   310  	// generates "node as node(func: eq(predicate, xid)) {uid}"
   311  
   312  	sb := strings.Builder{}
   313  	sb.WriteString(node)
   314  	sb.WriteString(" as ")
   315  	sb.WriteString(node)
   316  	sb.WriteString("(func: eq(")
   317  	sb.WriteString(predicate)
   318  	sb.WriteString(`, `)
   319  	sb.WriteString(strconv.Quote(xid))
   320  	sb.WriteString(`)) {uid}`)
   321  	return sb.String()
   322  }
   323  
   324  func (l *loader) upsertUids(nqs []*api.NQuad) {
   325  	// We form upsertPredicate query for each of the ids we saw in the request, along with
   326  	// adding the corresponding xid to that uid. The mutation we added is only useful if the
   327  	// uid doesn't exists.
   328  	//
   329  	// Example upsertPredicate mutation:
   330  	//
   331  	// query {
   332  	//     u_1 as var(func: eq(xid, "m.1234"))
   333  	// }
   334  	//
   335  	// mutation {
   336  	//     set {
   337  	//          uid(u_1) xid m.1234 .
   338  	//     }
   339  	// }
   340  	l.upsertLock.Lock()
   341  	defer l.upsertLock.Unlock()
   342  
   343  	ids := make(map[string]string)
   344  
   345  	for _, nq := range nqs {
   346  		// taking hash as the value might contain invalid symbols
   347  		subject := x.NamespaceAttr(nq.Namespace, nq.Subject)
   348  		ids[subject] = generateBlankNode(subject)
   349  
   350  		if len(nq.ObjectId) > 0 {
   351  			// taking hash as the value might contain invalid symbols
   352  			object := x.NamespaceAttr(nq.Namespace, nq.ObjectId)
   353  			ids[object] = generateBlankNode(object)
   354  		}
   355  	}
   356  
   357  	mutations := make([]*api.NQuad, 0, len(ids))
   358  	query := strings.Builder{}
   359  	query.WriteString("query {")
   360  	query.WriteRune('\n')
   361  
   362  	for xid, idx := range ids {
   363  		if l.alloc.CheckUid(xid) {
   364  			continue
   365  		}
   366  
   367  		// Strip away the namespace from the query and mutation.
   368  		xid := x.ParseAttr(xid)
   369  		query.WriteString(generateQuery(idx, opt.upsertPredicate, xid))
   370  		query.WriteRune('\n')
   371  		mutations = append(mutations, &api.NQuad{
   372  			Subject:     generateUidFunc(idx),
   373  			Predicate:   opt.upsertPredicate,
   374  			ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: xid}},
   375  		})
   376  	}
   377  
   378  	if len(mutations) == 0 {
   379  		return
   380  	}
   381  
   382  	query.WriteRune('}')
   383  
   384  	// allocate all the new xids
   385  	resp, err := l.dc.NewTxn().Do(l.opts.Ctx, &api.Request{
   386  		CommitNow: true,
   387  		Query:     query.String(),
   388  		Mutations: []*api.Mutation{{Set: mutations}},
   389  	})
   390  
   391  	if err != nil {
   392  		panic(err)
   393  	}
   394  
   395  	type dResult struct {
   396  		Uid string
   397  	}
   398  
   399  	var result map[string][]dResult
   400  	err = json.Unmarshal(resp.GetJson(), &result)
   401  	if err != nil {
   402  		panic(err)
   403  	}
   404  
   405  	for xid, idx := range ids {
   406  		// xid already exist in dgraph
   407  		if val, ok := result[idx]; ok && len(val) > 0 {
   408  			uid, err := strconv.ParseUint(val[0].Uid, 0, 64)
   409  			if err != nil {
   410  				panic(err)
   411  			}
   412  
   413  			l.alloc.SetUid(xid, uid)
   414  			continue
   415  		}
   416  
   417  		// new uid created in draph
   418  		if val, ok := resp.GetUids()[generateUidFunc(idx)]; ok {
   419  			uid, err := strconv.ParseUint(val, 0, 64)
   420  			if err != nil {
   421  				panic(err)
   422  			}
   423  
   424  			l.alloc.SetUid(xid, uid)
   425  			continue
   426  		}
   427  	}
   428  }
   429  
   430  // allocateUids looks for the maximum uid value in the given NQuads and bumps the
   431  // maximum seen uid to that value.
   432  func (l *loader) allocateUids(nqs []*api.NQuad) {
   433  	if opt.newUids {
   434  		return
   435  	}
   436  
   437  	var maxUid uint64
   438  	for _, nq := range nqs {
   439  		sUid, err := strconv.ParseUint(nq.Subject, 0, 64)
   440  		if err != nil {
   441  			continue
   442  		}
   443  		if sUid > maxUid {
   444  			maxUid = sUid
   445  		}
   446  
   447  		oUid, err := strconv.ParseUint(nq.ObjectId, 0, 64)
   448  		if err != nil {
   449  			continue
   450  		}
   451  		if oUid > maxUid {
   452  			maxUid = oUid
   453  		}
   454  	}
   455  	l.alloc.BumpTo(maxUid)
   456  }
   457  
   458  // processFile forwards a file to the RDF or JSON processor as appropriate
   459  func (l *loader) processFile(ctx context.Context, fs filestore.FileStore, filename string,
   460  	key x.Sensitive) error {
   461  
   462  	fmt.Printf("Processing data file %q\n", filename)
   463  
   464  	rd, cleanup := fs.ChunkReader(filename, key)
   465  	defer cleanup()
   466  
   467  	loadType := chunker.DataFormat(filename, opt.dataFormat)
   468  	if loadType == chunker.UnknownFormat {
   469  		if isJson, err := chunker.IsJSONData(rd); err == nil {
   470  			if isJson {
   471  				loadType = chunker.JsonFormat
   472  			} else {
   473  				return errors.Errorf("need --format=rdf or --format=json to load %s", filename)
   474  			}
   475  		}
   476  	}
   477  
   478  	return l.processLoadFile(ctx, rd, chunker.NewChunker(loadType, opt.batchSize))
   479  }
   480  
   481  func (l *loader) processLoadFile(ctx context.Context, rd *bufio.Reader, ck chunker.Chunker) error {
   482  	nqbuf := ck.NQuads()
   483  	errCh := make(chan error, 1)
   484  	// Spin a goroutine to push NQuads to mutation channel.
   485  	go func() {
   486  		var err error
   487  		defer func() {
   488  			errCh <- err
   489  		}()
   490  		buffer := make([]*api.NQuad, 0, opt.bufferSize*opt.batchSize)
   491  
   492  		drain := func() {
   493  			// We collect opt.bufferSize requests and preprocess them. For the requests
   494  			// to not confict between themself, we sort them on the basis of their predicates.
   495  			// Predicates with count index will conflict among themselves, so we keep them at
   496  			// end, making room for other predicates to load quickly.
   497  			sort.Slice(buffer, func(i, j int) bool {
   498  				iPred := sch.preds[x.NamespaceAttr(buffer[i].Namespace, buffer[i].Predicate)]
   499  				jPred := sch.preds[x.NamespaceAttr(buffer[j].Namespace, buffer[j].Predicate)]
   500  				t := func(a *predicate) int {
   501  					if a != nil && a.Count {
   502  						return 1
   503  					}
   504  					return 0
   505  				}
   506  
   507  				// Sorts the nquads on basis of their predicates, while keeping the
   508  				// predicates with count index later than those without it.
   509  				if t(iPred) != t(jPred) {
   510  					return t(iPred) < t(jPred)
   511  				}
   512  				return buffer[i].Predicate < buffer[j].Predicate
   513  			})
   514  			for len(buffer) > 0 {
   515  				sz := opt.batchSize
   516  				if len(buffer) < opt.batchSize {
   517  					sz = len(buffer)
   518  				}
   519  				mu := &request{Mutation: &api.Mutation{Set: buffer[:sz]}}
   520  				l.reqs <- mu
   521  				buffer = buffer[sz:]
   522  			}
   523  		}
   524  
   525  		for nqs := range nqbuf.Ch() {
   526  			if len(nqs) == 0 {
   527  				continue
   528  			}
   529  
   530  			for _, nq := range nqs {
   531  				if !opt.preserveNs {
   532  					// If do not preserve namespace, use the namespace passed through
   533  					// `--force-namespace` flag.
   534  					nq.Namespace = opt.namespaceToLoad
   535  				}
   536  				if _, ok := l.namespaces[nq.Namespace]; !ok {
   537  					err = errors.Errorf("Cannot load nquad:%+v as its namespace doesn't exist.", nq)
   538  					return
   539  				}
   540  			}
   541  
   542  			if opt.upsertPredicate == "" {
   543  				l.allocateUids(nqs)
   544  			} else {
   545  				// TODO(Naman): Handle this. Upserts UIDs send a single upsert block for multiple
   546  				// nquads. These nquads may belong to different namespaces. Hence, alpha can't
   547  				// figure out its processsing.
   548  				// Currently, this option works with data loading in the logged-in namespace.
   549  				// TODO(Naman): Add a test for a case when it works and when it doesn't.
   550  				l.upsertUids(nqs)
   551  			}
   552  
   553  			for _, nq := range nqs {
   554  				nq.Subject = l.uid(nq.Subject, nq.Namespace)
   555  				if len(nq.ObjectId) > 0 {
   556  					nq.ObjectId = l.uid(nq.ObjectId, nq.Namespace)
   557  				}
   558  			}
   559  
   560  			buffer = append(buffer, nqs...)
   561  			if len(buffer) < opt.bufferSize*opt.batchSize {
   562  				continue
   563  			}
   564  
   565  			drain()
   566  		}
   567  		drain()
   568  	}()
   569  
   570  	for {
   571  		select {
   572  		case <-ctx.Done():
   573  			return ctx.Err()
   574  		case err := <-errCh:
   575  			return err
   576  		default:
   577  		}
   578  
   579  		chunkBuf, err := ck.Chunk(rd)
   580  		// Parses the rdf entries from the chunk, groups them into batches (each one
   581  		// containing opt.batchSize entries) and sends the batches to the loader.reqs channel (see
   582  		// above).
   583  		if oerr := ck.Parse(chunkBuf); oerr != nil {
   584  			return errors.Wrap(oerr, "During parsing chunk in processLoadFile")
   585  		}
   586  		if err == io.EOF {
   587  			break
   588  		} else {
   589  			x.Check(err)
   590  		}
   591  	}
   592  	nqbuf.Flush()
   593  	return <-errCh
   594  }
   595  
   596  func setup(opts batchMutationOptions, dc *dgo.Dgraph, conf *viper.Viper) *loader {
   597  	var db *badger.DB
   598  	if len(opt.clientDir) > 0 {
   599  		x.Check(os.MkdirAll(opt.clientDir, 0700))
   600  
   601  		var err error
   602  		db, err = badger.Open(badger.DefaultOptions(opt.clientDir).
   603  			WithCompression(bopt.ZSTD).
   604  			WithSyncWrites(false).
   605  			WithBlockCacheSize(100 * (1 << 20)).
   606  			WithIndexCacheSize(100 * (1 << 20)).
   607  			WithZSTDCompressionLevel(3))
   608  		x.Checkf(err, "Error while creating badger KV posting store")
   609  
   610  	}
   611  
   612  	dialOpts := []grpc.DialOption{}
   613  	if conf.GetString("slash_grpc_endpoint") != "" && conf.IsSet("auth_token") {
   614  		dialOpts = append(dialOpts, x.WithAuthorizationCredentials(conf.GetString("auth_token")))
   615  	}
   616  
   617  	var tlsConfig *tls.Config
   618  	if conf.GetString("slash_grpc_endpoint") != "" {
   619  		var tlsErr error
   620  		tlsConfig, tlsErr = x.SlashTLSConfig(conf.GetString("slash_grpc_endpoint"))
   621  		x.Checkf(tlsErr, "Unable to generate TLS Cert Pool")
   622  	} else {
   623  		var tlsErr error
   624  		tlsConfig, tlsErr = x.LoadClientTLSConfigForInternalPort(conf)
   625  		x.Check(tlsErr)
   626  	}
   627  
   628  	// compression with zero server actually makes things worse
   629  	connzero, err := x.SetupConnection(opt.zero, tlsConfig, false, dialOpts...)
   630  	x.Checkf(err, "Unable to connect to zero, Is it running at %s?", opt.zero)
   631  
   632  	xopts := xidmap.XidMapOptions{UidAssigner: connzero, DB: db}
   633  	// Slash uses alpha to assign UIDs in live loader. Dgraph client is needed by xidmap to do
   634  	// authorization.
   635  	xopts.DgClient = dc
   636  
   637  	alloc := xidmap.New(xopts)
   638  	l := &loader{
   639  		opts:       opts,
   640  		dc:         dc,
   641  		start:      time.Now(),
   642  		reqs:       make(chan *request, opts.Pending*2),
   643  		conflicts:  make(map[uint64]struct{}),
   644  		alloc:      alloc,
   645  		db:         db,
   646  		zeroconn:   connzero,
   647  		namespaces: make(map[uint64]struct{}),
   648  	}
   649  
   650  	l.requestsWg.Add(opts.Pending)
   651  	for i := 0; i < opts.Pending; i++ {
   652  		go l.makeRequests()
   653  	}
   654  
   655  	rand.Seed(time.Now().Unix())
   656  	return l
   657  }
   658  
   659  // populateNamespace fetches the schema and extracts the information about the existing namespaces.
   660  func (l *loader) populateNamespaces(ctx context.Context, dc *dgo.Dgraph, singleNsOp bool) error {
   661  	if singleNsOp {
   662  		// The below schema query returns the predicates without the namespace if context does not
   663  		// have the galaxy operation set. As we are not loading data across namespaces, so existence
   664  		// of namespace is verified when the user logs in.
   665  		l.namespaces[opt.namespaceToLoad] = struct{}{}
   666  		return nil
   667  	}
   668  
   669  	txn := dc.NewTxn()
   670  	defer func() {
   671  		if err := txn.Discard(ctx); err != nil {
   672  			glog.Warningf("error in discarding txn: %v", err)
   673  		}
   674  	}()
   675  
   676  	res, err := txn.Query(ctx, "schema {}")
   677  	if err != nil {
   678  		return err
   679  	}
   680  
   681  	var sch schema
   682  	err = json.Unmarshal(res.GetJson(), &sch)
   683  	if err != nil {
   684  		return err
   685  	}
   686  
   687  	for _, pred := range sch.Predicates {
   688  		ns := x.ParseNamespace(pred.Predicate)
   689  		l.namespaces[ns] = struct{}{}
   690  	}
   691  	return nil
   692  }
   693  
   694  func run() error {
   695  	var zero string
   696  	if Live.Conf.GetString("slash_grpc_endpoint") != "" {
   697  		zero = Live.Conf.GetString("slash_grpc_endpoint")
   698  	} else {
   699  		zero = Live.Conf.GetString("zero")
   700  	}
   701  
   702  	creds := z.NewSuperFlag(Live.Conf.GetString("creds")).MergeAndCheckDefault(x.DefaultCreds)
   703  	keys, err := ee.GetKeys(Live.Conf)
   704  	if err != nil {
   705  		return err
   706  	}
   707  
   708  	x.PrintVersion()
   709  	opt = options{
   710  		dataFiles:       Live.Conf.GetString("files"),
   711  		dataFormat:      Live.Conf.GetString("format"),
   712  		schemaFile:      Live.Conf.GetString("schema"),
   713  		zero:            zero,
   714  		concurrent:      Live.Conf.GetInt("conc"),
   715  		batchSize:       Live.Conf.GetInt("batch"),
   716  		clientDir:       Live.Conf.GetString("xidmap"),
   717  		authToken:       Live.Conf.GetString("auth_token"),
   718  		useCompression:  Live.Conf.GetBool("use_compression"),
   719  		newUids:         Live.Conf.GetBool("new_uids"),
   720  		verbose:         Live.Conf.GetBool("verbose"),
   721  		httpAddr:        Live.Conf.GetString("http"),
   722  		bufferSize:      Live.Conf.GetInt("bufferSize"),
   723  		upsertPredicate: Live.Conf.GetString("upsertPredicate"),
   724  		tmpDir:          Live.Conf.GetString("tmp"),
   725  		key:             keys.EncKey,
   726  	}
   727  
   728  	forceNs := Live.Conf.GetInt64("force-namespace")
   729  	switch creds.GetUint64("namespace") {
   730  	case x.GalaxyNamespace:
   731  		if forceNs < 0 {
   732  			opt.preserveNs = true
   733  			opt.namespaceToLoad = math.MaxUint64
   734  		} else {
   735  			opt.namespaceToLoad = uint64(forceNs)
   736  		}
   737  	default:
   738  		if Live.Conf.IsSet("force-namespace") {
   739  			return errors.Errorf("cannot force namespace %#x when provided creds are not of"+
   740  				" guardian of galaxy user", forceNs)
   741  		}
   742  		opt.namespaceToLoad = creds.GetUint64("namespace")
   743  	}
   744  
   745  	z.SetTmpDir(opt.tmpDir)
   746  
   747  	go func() {
   748  		if err := http.ListenAndServe(opt.httpAddr, nil); err != nil {
   749  			glog.Errorf("Error while starting HTTP server: %+v", err)
   750  		}
   751  	}()
   752  	ctx := context.Background()
   753  	// singleNsOp is set to false, when loading data into a namespace different from the one user
   754  	// provided credentials for.
   755  	singleNsOp := true
   756  	if len(creds.GetString("user")) > 0 && creds.GetUint64("namespace") == x.GalaxyNamespace &&
   757  		opt.namespaceToLoad != x.GalaxyNamespace {
   758  		singleNsOp = false
   759  	}
   760  	galaxyOperation := false
   761  	if !singleNsOp {
   762  		// Attach the galaxy to the context to specify that the query/mutations with this context
   763  		// will be galaxy-wide.
   764  		galaxyOperation = true
   765  		ctx = x.AttachGalaxyOperation(ctx, opt.namespaceToLoad)
   766  		// We don't support upsert predicate while loading data in multiple namespace.
   767  		if len(opt.upsertPredicate) > 0 {
   768  			return errors.Errorf("Upsert Predicate feature is not supported for loading" +
   769  				"into multiple namespaces.")
   770  		}
   771  	}
   772  
   773  	bmOpts := batchMutationOptions{
   774  		Size:          opt.batchSize,
   775  		Pending:       opt.concurrent,
   776  		PrintCounters: true,
   777  		Ctx:           ctx,
   778  		MaxRetries:    math.MaxUint32,
   779  		bufferSize:    opt.bufferSize,
   780  	}
   781  
   782  	// Create directory for temporary buffers.
   783  	x.Check(os.MkdirAll(opt.tmpDir, 0700))
   784  
   785  	dg, closeFunc := x.GetDgraphClient(Live.Conf, true)
   786  	defer closeFunc()
   787  
   788  	l := setup(bmOpts, dg, Live.Conf)
   789  	defer l.zeroconn.Close()
   790  
   791  	if err := l.populateNamespaces(ctx, dg, singleNsOp); err != nil {
   792  		fmt.Printf("Error while populating namespaces %s\n", err)
   793  		return err
   794  	}
   795  
   796  	if !opt.preserveNs {
   797  		if _, ok := l.namespaces[opt.namespaceToLoad]; !ok {
   798  			return errors.Errorf("Cannot load into namespace %#x. It does not exist.",
   799  				opt.namespaceToLoad)
   800  		}
   801  	}
   802  
   803  	if len(opt.schemaFile) > 0 {
   804  		err := l.processSchemaFile(ctx, opt.schemaFile, opt.key, dg)
   805  		if err != nil {
   806  			if err == context.Canceled {
   807  				fmt.Printf("Interrupted while processing schema file %q\n", opt.schemaFile)
   808  				return nil
   809  			}
   810  			fmt.Printf("Error while processing schema file %q: %s\n", opt.schemaFile, err)
   811  			return err
   812  		}
   813  		fmt.Printf("Processed schema file %q\n\n", opt.schemaFile)
   814  	}
   815  
   816  	if l.schema, err = getSchema(ctx, dg, galaxyOperation); err != nil {
   817  		fmt.Printf("Error while loading schema from alpha %s\n", err)
   818  		return err
   819  	}
   820  
   821  	if opt.dataFiles == "" {
   822  		return errors.New("RDF or JSON file(s) location must be specified")
   823  	}
   824  
   825  	fs := filestore.NewFileStore(opt.dataFiles)
   826  
   827  	filesList := fs.FindDataFiles(opt.dataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"})
   828  	totalFiles := len(filesList)
   829  	if totalFiles == 0 {
   830  		return errors.Errorf("No data files found in %s", opt.dataFiles)
   831  	}
   832  	fmt.Printf("Found %d data file(s) to process\n", totalFiles)
   833  
   834  	errCh := make(chan error, totalFiles)
   835  	for _, file := range filesList {
   836  		file = strings.Trim(file, " \t")
   837  		go func(file string) {
   838  			errCh <- errors.Wrapf(l.processFile(ctx, fs, file, opt.key), file)
   839  		}(file)
   840  	}
   841  
   842  	// PrintCounters should be called after schema has been updated.
   843  	if bmOpts.PrintCounters {
   844  		go l.printCounters()
   845  	}
   846  
   847  	for i := 0; i < totalFiles; i++ {
   848  		if err := <-errCh; err != nil {
   849  			fmt.Printf("Error while processing data file %s\n", err)
   850  			return err
   851  		}
   852  	}
   853  
   854  	close(l.reqs)
   855  	// First we wait for requestsWg, when it is done we know all retry requests have been added
   856  	// to retryRequestsWg. We can't have the same waitgroup as by the time we call Wait, we can't
   857  	// be sure that all retry requests have been added to the waitgroup.
   858  	l.requestsWg.Wait()
   859  	l.retryRequestsWg.Wait()
   860  	c := l.Counter()
   861  	var rate uint64
   862  	if c.Elapsed.Seconds() < 1 {
   863  		rate = c.Nquads
   864  	} else {
   865  		rate = c.Nquads / uint64(c.Elapsed.Seconds())
   866  	}
   867  	// Lets print an empty line, otherwise Interrupted or Number of Mutations overwrites the
   868  	// previous printed line.
   869  	fmt.Printf("%100s\r", "")
   870  	fmt.Printf("Number of TXs run            : %d\n", c.TxnsDone)
   871  	fmt.Printf("Number of N-Quads processed  : %d\n", c.Nquads)
   872  	fmt.Printf("Time spent                   : %v\n", c.Elapsed)
   873  	fmt.Printf("N-Quads processed per second : %d\n", rate)
   874  
   875  	if err := l.alloc.Flush(); err != nil {
   876  		return err
   877  	}
   878  	if l.db != nil {
   879  		if err := l.db.Close(); err != nil {
   880  			return err
   881  		}
   882  	}
   883  	return nil
   884  }