github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/live/run.go (about) 1 /* 2 * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package live 18 19 import ( 20 "bufio" 21 "compress/gzip" 22 "context" 23 "crypto/tls" 24 "encoding/json" 25 "fmt" 26 "io" 27 "math" 28 "math/rand" 29 "net/http" 30 _ "net/http/pprof" // http profiler 31 "os" 32 "sort" 33 "strconv" 34 "strings" 35 "time" 36 37 "github.com/dgryski/go-farm" 38 "github.com/golang/glog" 39 "github.com/pkg/errors" 40 "github.com/spf13/cobra" 41 "github.com/spf13/viper" 42 "google.golang.org/grpc" 43 "google.golang.org/grpc/metadata" 44 45 "github.com/dgraph-io/badger/v3" 46 bopt "github.com/dgraph-io/badger/v3/options" 47 "github.com/dgraph-io/dgo/v210" 48 "github.com/dgraph-io/dgo/v210/protos/api" 49 "github.com/dgraph-io/dgraph/chunker" 50 "github.com/dgraph-io/dgraph/ee" 51 "github.com/dgraph-io/dgraph/ee/enc" 52 "github.com/dgraph-io/dgraph/filestore" 53 schemapkg "github.com/dgraph-io/dgraph/schema" 54 "github.com/dgraph-io/dgraph/types" 55 "github.com/dgraph-io/dgraph/x" 56 "github.com/dgraph-io/dgraph/xidmap" 57 "github.com/dgraph-io/ristretto/z" 58 ) 59 60 type options struct { 61 dataFiles string 62 dataFormat string 63 schemaFile string 64 zero string 65 concurrent int 66 batchSize int 67 clientDir string 68 authToken string 69 useCompression bool 70 newUids bool 71 verbose bool 72 httpAddr string 73 bufferSize int 74 upsertPredicate string 75 tmpDir string 76 key x.Sensitive 77 namespaceToLoad uint64 78 preserveNs bool 79 } 80 81 type predicate struct { 82 Predicate string `json:"predicate,omitempty"` 83 Type string `json:"type,omitempty"` 84 Tokenizer []string `json:"tokenizer,omitempty"` 85 Count bool `json:"count,omitempty"` 86 List bool `json:"list,omitempty"` 87 Lang bool `json:"lang,omitempty"` 88 Index bool `json:"index,omitempty"` 89 Upsert bool `json:"upsert,omitempty"` 90 Reverse bool `json:"reverse,omitempty"` 91 NoConflict bool `json:"no_conflict,omitempty"` 92 ValueType types.TypeID 93 } 94 95 type schema struct { 96 Predicates []*predicate `json:"schema,omitempty"` 97 preds map[string]*predicate 98 } 99 100 type request struct { 101 *api.Mutation 102 conflicts []uint64 103 } 104 105 func (l *schema) init(ns uint64, galaxyOperation bool) { 106 l.preds = make(map[string]*predicate) 107 for _, i := range l.Predicates { 108 i.ValueType, _ = types.TypeForName(i.Type) 109 if !galaxyOperation { 110 i.Predicate = x.NamespaceAttr(ns, i.Predicate) 111 } 112 l.preds[i.Predicate] = i 113 } 114 } 115 116 var ( 117 opt options 118 sch schema 119 120 // Live is the sub-command invoked when running "dgraph live". 121 Live x.SubCommand 122 ) 123 124 func init() { 125 Live.Cmd = &cobra.Command{ 126 Use: "live", 127 Short: "Run Dgraph Live Loader", 128 Run: func(cmd *cobra.Command, args []string) { 129 defer x.StartProfile(Live.Conf).Stop() 130 if err := run(); err != nil { 131 x.Check2(fmt.Fprintf(os.Stderr, "%s", err.Error())) 132 os.Exit(1) 133 } 134 }, 135 Annotations: map[string]string{"group": "data-load"}, 136 } 137 Live.EnvPrefix = "DGRAPH_LIVE" 138 Live.Cmd.SetHelpTemplate(x.NonRootTemplate) 139 140 flag := Live.Cmd.Flags() 141 // --vault SuperFlag and encryption flags 142 ee.RegisterEncFlag(flag) 143 // --tls SuperFlag 144 x.RegisterClientTLSFlags(flag) 145 146 flag.StringP("files", "f", "", "Location of *.rdf(.gz) or *.json(.gz) file(s) to load") 147 flag.StringP("schema", "s", "", "Location of schema file") 148 flag.String("format", "", "Specify file format (rdf or json) instead of getting it "+ 149 "from filename") 150 flag.StringP("alpha", "a", "127.0.0.1:9080", 151 "Comma-separated list of Dgraph alpha gRPC server addresses") 152 flag.StringP("zero", "z", "127.0.0.1:5080", "Dgraph zero gRPC server address") 153 flag.IntP("conc", "c", 10, 154 "Number of concurrent requests to make to Dgraph") 155 flag.IntP("batch", "b", 1000, 156 "Number of N-Quads to send as part of a mutation.") 157 flag.StringP("xidmap", "x", "", "Directory to store xid to uid mapping") 158 flag.StringP("auth_token", "t", "", 159 "The auth token passed to the server for Alter operation of the schema file. "+ 160 "If used with --slash_grpc_endpoint, then this should be set to the API token issued"+ 161 "by Slash GraphQL") 162 flag.String("slash_grpc_endpoint", "", "Path to Slash GraphQL GRPC endpoint. "+ 163 "If --slash_grpc_endpoint is set, all other TLS options and connection options will be"+ 164 "ignored") 165 flag.BoolP("use_compression", "C", false, 166 "Enable compression on connection to alpha server") 167 flag.Bool("new_uids", false, 168 "Ignore UIDs in load files and assign new ones.") 169 flag.String("http", "localhost:6060", "Address to serve http (pprof).") 170 flag.Bool("verbose", false, "Run the live loader in verbose mode") 171 172 flag.String("creds", "", 173 `Various login credentials if login is required. 174 user defines the username to login. 175 password defines the password of the user. 176 namespace defines the namespace to log into. 177 Sample flag could look like --creds user=username;password=mypass;namespace=2`) 178 179 flag.StringP("bufferSize", "m", "100", "Buffer for each thread") 180 flag.StringP("upsertPredicate", "U", "", "run in upsertPredicate mode. the value would "+ 181 "be used to store blank nodes as an xid") 182 flag.String("tmp", "t", "Directory to store temporary buffers.") 183 flag.Int64("force-namespace", 0, "Namespace onto which to load the data."+ 184 "Only guardian of galaxy should use this for loading data into multiple namespaces or some"+ 185 "specific namespace. Setting it to negative value will preserve the namespace.") 186 } 187 188 func getSchema(ctx context.Context, dgraphClient *dgo.Dgraph, galaxyOperation bool) (*schema, error) { 189 txn := dgraphClient.NewTxn() 190 defer func() { 191 if err := txn.Discard(ctx); err != nil { 192 glog.Warningf("error in discarding txn: %v", err) 193 } 194 }() 195 196 res, err := txn.Query(ctx, "schema {}") 197 if err != nil { 198 return nil, err 199 } 200 201 err = json.Unmarshal(res.GetJson(), &sch) 202 if err != nil { 203 return nil, err 204 } 205 // If we are not loading data across namespaces, the schema query result will not contain the 206 // namespace information. Set it inside the init function. 207 sch.init(opt.namespaceToLoad, galaxyOperation) 208 return &sch, nil 209 } 210 211 // validate that the schema contains the predicates whose namespace exist. 212 func validateSchema(sch string, namespaces map[uint64]struct{}) error { 213 result, err := schemapkg.Parse(sch) 214 if err != nil { 215 return err 216 } 217 for _, pred := range result.Preds { 218 ns := x.ParseNamespace(pred.Predicate) 219 if _, ok := namespaces[ns]; !ok { 220 return errors.Errorf("Namespace %#x doesn't exist for pred %s.", ns, pred.Predicate) 221 } 222 } 223 for _, typ := range result.Types { 224 ns := x.ParseNamespace(typ.TypeName) 225 if _, ok := namespaces[ns]; !ok { 226 return errors.Errorf("Namespace %#x doesn't exist for type %s.", ns, typ.TypeName) 227 } 228 } 229 return nil 230 } 231 232 // processSchemaFile process schema for a given gz file. 233 func (l *loader) processSchemaFile(ctx context.Context, file string, key x.Sensitive, 234 dgraphClient *dgo.Dgraph) error { 235 fmt.Printf("\nProcessing schema file %q\n", file) 236 if len(opt.authToken) > 0 { 237 md := metadata.New(nil) 238 md.Append("auth-token", opt.authToken) 239 ctx = metadata.NewOutgoingContext(ctx, md) 240 } 241 242 f, err := filestore.Open(file) 243 x.CheckfNoTrace(err) 244 defer f.Close() 245 246 reader, err := enc.GetReader(key, f) 247 x.Check(err) 248 if strings.HasSuffix(strings.ToLower(file), ".gz") { 249 reader, err = gzip.NewReader(reader) 250 x.Check(err) 251 } 252 253 b, err := io.ReadAll(reader) 254 if err != nil { 255 x.Checkf(err, "Error while reading file") 256 } 257 258 op := &api.Operation{} 259 op.Schema = string(b) 260 if opt.preserveNs { 261 // Verify schema if we are loding into multiple namespaces. 262 if err := validateSchema(op.Schema, l.namespaces); err != nil { 263 return err 264 } 265 } 266 return dgraphClient.Alter(ctx, op) 267 } 268 269 func (l *loader) uid(val string, ns uint64) string { 270 // Attempt to parse as a UID (in the same format that dgraph outputs - a 271 // hex number prefixed by "0x"). If parsing succeeds, then this is assumed 272 // to be an existing node in the graph. There is limited protection against 273 // a user selecting an unassigned UID in this way - it may be assigned 274 // later to another node. It is up to the user to avoid this. 275 if !opt.newUids { 276 if uid, err := strconv.ParseUint(val, 0, 64); err == nil { 277 return fmt.Sprintf("%#x", uid) 278 } 279 } 280 281 // TODO(Naman): Do we still need this here? As xidmap which uses btree does not keep hold of 282 // this string. 283 sb := strings.Builder{} 284 x.Check2(sb.WriteString(x.NamespaceAttr(ns, val))) 285 uid, _ := l.alloc.AssignUid(sb.String()) 286 287 return fmt.Sprintf("%#x", uint64(uid)) 288 } 289 290 func generateBlankNode(val string) string { 291 // generates "u_hash(val)" 292 293 sb := strings.Builder{} 294 x.Check2(sb.WriteString("u_")) 295 x.Check2(sb.WriteString(strconv.FormatUint(farm.Fingerprint64([]byte(val)), 10))) 296 return sb.String() 297 } 298 299 func generateUidFunc(val string) string { 300 // generates "uid(val)" 301 302 sb := strings.Builder{} 303 sb.WriteString("uid(") 304 sb.WriteString(val) 305 sb.WriteRune(')') 306 return sb.String() 307 } 308 309 func generateQuery(node, predicate, xid string) string { 310 // generates "node as node(func: eq(predicate, xid)) {uid}" 311 312 sb := strings.Builder{} 313 sb.WriteString(node) 314 sb.WriteString(" as ") 315 sb.WriteString(node) 316 sb.WriteString("(func: eq(") 317 sb.WriteString(predicate) 318 sb.WriteString(`, `) 319 sb.WriteString(strconv.Quote(xid)) 320 sb.WriteString(`)) {uid}`) 321 return sb.String() 322 } 323 324 func (l *loader) upsertUids(nqs []*api.NQuad) { 325 // We form upsertPredicate query for each of the ids we saw in the request, along with 326 // adding the corresponding xid to that uid. The mutation we added is only useful if the 327 // uid doesn't exists. 328 // 329 // Example upsertPredicate mutation: 330 // 331 // query { 332 // u_1 as var(func: eq(xid, "m.1234")) 333 // } 334 // 335 // mutation { 336 // set { 337 // uid(u_1) xid m.1234 . 338 // } 339 // } 340 l.upsertLock.Lock() 341 defer l.upsertLock.Unlock() 342 343 ids := make(map[string]string) 344 345 for _, nq := range nqs { 346 // taking hash as the value might contain invalid symbols 347 subject := x.NamespaceAttr(nq.Namespace, nq.Subject) 348 ids[subject] = generateBlankNode(subject) 349 350 if len(nq.ObjectId) > 0 { 351 // taking hash as the value might contain invalid symbols 352 object := x.NamespaceAttr(nq.Namespace, nq.ObjectId) 353 ids[object] = generateBlankNode(object) 354 } 355 } 356 357 mutations := make([]*api.NQuad, 0, len(ids)) 358 query := strings.Builder{} 359 query.WriteString("query {") 360 query.WriteRune('\n') 361 362 for xid, idx := range ids { 363 if l.alloc.CheckUid(xid) { 364 continue 365 } 366 367 // Strip away the namespace from the query and mutation. 368 xid := x.ParseAttr(xid) 369 query.WriteString(generateQuery(idx, opt.upsertPredicate, xid)) 370 query.WriteRune('\n') 371 mutations = append(mutations, &api.NQuad{ 372 Subject: generateUidFunc(idx), 373 Predicate: opt.upsertPredicate, 374 ObjectValue: &api.Value{Val: &api.Value_StrVal{StrVal: xid}}, 375 }) 376 } 377 378 if len(mutations) == 0 { 379 return 380 } 381 382 query.WriteRune('}') 383 384 // allocate all the new xids 385 resp, err := l.dc.NewTxn().Do(l.opts.Ctx, &api.Request{ 386 CommitNow: true, 387 Query: query.String(), 388 Mutations: []*api.Mutation{{Set: mutations}}, 389 }) 390 391 if err != nil { 392 panic(err) 393 } 394 395 type dResult struct { 396 Uid string 397 } 398 399 var result map[string][]dResult 400 err = json.Unmarshal(resp.GetJson(), &result) 401 if err != nil { 402 panic(err) 403 } 404 405 for xid, idx := range ids { 406 // xid already exist in dgraph 407 if val, ok := result[idx]; ok && len(val) > 0 { 408 uid, err := strconv.ParseUint(val[0].Uid, 0, 64) 409 if err != nil { 410 panic(err) 411 } 412 413 l.alloc.SetUid(xid, uid) 414 continue 415 } 416 417 // new uid created in draph 418 if val, ok := resp.GetUids()[generateUidFunc(idx)]; ok { 419 uid, err := strconv.ParseUint(val, 0, 64) 420 if err != nil { 421 panic(err) 422 } 423 424 l.alloc.SetUid(xid, uid) 425 continue 426 } 427 } 428 } 429 430 // allocateUids looks for the maximum uid value in the given NQuads and bumps the 431 // maximum seen uid to that value. 432 func (l *loader) allocateUids(nqs []*api.NQuad) { 433 if opt.newUids { 434 return 435 } 436 437 var maxUid uint64 438 for _, nq := range nqs { 439 sUid, err := strconv.ParseUint(nq.Subject, 0, 64) 440 if err != nil { 441 continue 442 } 443 if sUid > maxUid { 444 maxUid = sUid 445 } 446 447 oUid, err := strconv.ParseUint(nq.ObjectId, 0, 64) 448 if err != nil { 449 continue 450 } 451 if oUid > maxUid { 452 maxUid = oUid 453 } 454 } 455 l.alloc.BumpTo(maxUid) 456 } 457 458 // processFile forwards a file to the RDF or JSON processor as appropriate 459 func (l *loader) processFile(ctx context.Context, fs filestore.FileStore, filename string, 460 key x.Sensitive) error { 461 462 fmt.Printf("Processing data file %q\n", filename) 463 464 rd, cleanup := fs.ChunkReader(filename, key) 465 defer cleanup() 466 467 loadType := chunker.DataFormat(filename, opt.dataFormat) 468 if loadType == chunker.UnknownFormat { 469 if isJson, err := chunker.IsJSONData(rd); err == nil { 470 if isJson { 471 loadType = chunker.JsonFormat 472 } else { 473 return errors.Errorf("need --format=rdf or --format=json to load %s", filename) 474 } 475 } 476 } 477 478 return l.processLoadFile(ctx, rd, chunker.NewChunker(loadType, opt.batchSize)) 479 } 480 481 func (l *loader) processLoadFile(ctx context.Context, rd *bufio.Reader, ck chunker.Chunker) error { 482 nqbuf := ck.NQuads() 483 errCh := make(chan error, 1) 484 // Spin a goroutine to push NQuads to mutation channel. 485 go func() { 486 var err error 487 defer func() { 488 errCh <- err 489 }() 490 buffer := make([]*api.NQuad, 0, opt.bufferSize*opt.batchSize) 491 492 drain := func() { 493 // We collect opt.bufferSize requests and preprocess them. For the requests 494 // to not confict between themself, we sort them on the basis of their predicates. 495 // Predicates with count index will conflict among themselves, so we keep them at 496 // end, making room for other predicates to load quickly. 497 sort.Slice(buffer, func(i, j int) bool { 498 iPred := sch.preds[x.NamespaceAttr(buffer[i].Namespace, buffer[i].Predicate)] 499 jPred := sch.preds[x.NamespaceAttr(buffer[j].Namespace, buffer[j].Predicate)] 500 t := func(a *predicate) int { 501 if a != nil && a.Count { 502 return 1 503 } 504 return 0 505 } 506 507 // Sorts the nquads on basis of their predicates, while keeping the 508 // predicates with count index later than those without it. 509 if t(iPred) != t(jPred) { 510 return t(iPred) < t(jPred) 511 } 512 return buffer[i].Predicate < buffer[j].Predicate 513 }) 514 for len(buffer) > 0 { 515 sz := opt.batchSize 516 if len(buffer) < opt.batchSize { 517 sz = len(buffer) 518 } 519 mu := &request{Mutation: &api.Mutation{Set: buffer[:sz]}} 520 l.reqs <- mu 521 buffer = buffer[sz:] 522 } 523 } 524 525 for nqs := range nqbuf.Ch() { 526 if len(nqs) == 0 { 527 continue 528 } 529 530 for _, nq := range nqs { 531 if !opt.preserveNs { 532 // If do not preserve namespace, use the namespace passed through 533 // `--force-namespace` flag. 534 nq.Namespace = opt.namespaceToLoad 535 } 536 if _, ok := l.namespaces[nq.Namespace]; !ok { 537 err = errors.Errorf("Cannot load nquad:%+v as its namespace doesn't exist.", nq) 538 return 539 } 540 } 541 542 if opt.upsertPredicate == "" { 543 l.allocateUids(nqs) 544 } else { 545 // TODO(Naman): Handle this. Upserts UIDs send a single upsert block for multiple 546 // nquads. These nquads may belong to different namespaces. Hence, alpha can't 547 // figure out its processsing. 548 // Currently, this option works with data loading in the logged-in namespace. 549 // TODO(Naman): Add a test for a case when it works and when it doesn't. 550 l.upsertUids(nqs) 551 } 552 553 for _, nq := range nqs { 554 nq.Subject = l.uid(nq.Subject, nq.Namespace) 555 if len(nq.ObjectId) > 0 { 556 nq.ObjectId = l.uid(nq.ObjectId, nq.Namespace) 557 } 558 } 559 560 buffer = append(buffer, nqs...) 561 if len(buffer) < opt.bufferSize*opt.batchSize { 562 continue 563 } 564 565 drain() 566 } 567 drain() 568 }() 569 570 for { 571 select { 572 case <-ctx.Done(): 573 return ctx.Err() 574 case err := <-errCh: 575 return err 576 default: 577 } 578 579 chunkBuf, err := ck.Chunk(rd) 580 // Parses the rdf entries from the chunk, groups them into batches (each one 581 // containing opt.batchSize entries) and sends the batches to the loader.reqs channel (see 582 // above). 583 if oerr := ck.Parse(chunkBuf); oerr != nil { 584 return errors.Wrap(oerr, "During parsing chunk in processLoadFile") 585 } 586 if err == io.EOF { 587 break 588 } else { 589 x.Check(err) 590 } 591 } 592 nqbuf.Flush() 593 return <-errCh 594 } 595 596 func setup(opts batchMutationOptions, dc *dgo.Dgraph, conf *viper.Viper) *loader { 597 var db *badger.DB 598 if len(opt.clientDir) > 0 { 599 x.Check(os.MkdirAll(opt.clientDir, 0700)) 600 601 var err error 602 db, err = badger.Open(badger.DefaultOptions(opt.clientDir). 603 WithCompression(bopt.ZSTD). 604 WithSyncWrites(false). 605 WithBlockCacheSize(100 * (1 << 20)). 606 WithIndexCacheSize(100 * (1 << 20)). 607 WithZSTDCompressionLevel(3)) 608 x.Checkf(err, "Error while creating badger KV posting store") 609 610 } 611 612 dialOpts := []grpc.DialOption{} 613 if conf.GetString("slash_grpc_endpoint") != "" && conf.IsSet("auth_token") { 614 dialOpts = append(dialOpts, x.WithAuthorizationCredentials(conf.GetString("auth_token"))) 615 } 616 617 var tlsConfig *tls.Config 618 if conf.GetString("slash_grpc_endpoint") != "" { 619 var tlsErr error 620 tlsConfig, tlsErr = x.SlashTLSConfig(conf.GetString("slash_grpc_endpoint")) 621 x.Checkf(tlsErr, "Unable to generate TLS Cert Pool") 622 } else { 623 var tlsErr error 624 tlsConfig, tlsErr = x.LoadClientTLSConfigForInternalPort(conf) 625 x.Check(tlsErr) 626 } 627 628 // compression with zero server actually makes things worse 629 connzero, err := x.SetupConnection(opt.zero, tlsConfig, false, dialOpts...) 630 x.Checkf(err, "Unable to connect to zero, Is it running at %s?", opt.zero) 631 632 xopts := xidmap.XidMapOptions{UidAssigner: connzero, DB: db} 633 // Slash uses alpha to assign UIDs in live loader. Dgraph client is needed by xidmap to do 634 // authorization. 635 xopts.DgClient = dc 636 637 alloc := xidmap.New(xopts) 638 l := &loader{ 639 opts: opts, 640 dc: dc, 641 start: time.Now(), 642 reqs: make(chan *request, opts.Pending*2), 643 conflicts: make(map[uint64]struct{}), 644 alloc: alloc, 645 db: db, 646 zeroconn: connzero, 647 namespaces: make(map[uint64]struct{}), 648 } 649 650 l.requestsWg.Add(opts.Pending) 651 for i := 0; i < opts.Pending; i++ { 652 go l.makeRequests() 653 } 654 655 rand.Seed(time.Now().Unix()) 656 return l 657 } 658 659 // populateNamespace fetches the schema and extracts the information about the existing namespaces. 660 func (l *loader) populateNamespaces(ctx context.Context, dc *dgo.Dgraph, singleNsOp bool) error { 661 if singleNsOp { 662 // The below schema query returns the predicates without the namespace if context does not 663 // have the galaxy operation set. As we are not loading data across namespaces, so existence 664 // of namespace is verified when the user logs in. 665 l.namespaces[opt.namespaceToLoad] = struct{}{} 666 return nil 667 } 668 669 txn := dc.NewTxn() 670 defer func() { 671 if err := txn.Discard(ctx); err != nil { 672 glog.Warningf("error in discarding txn: %v", err) 673 } 674 }() 675 676 res, err := txn.Query(ctx, "schema {}") 677 if err != nil { 678 return err 679 } 680 681 var sch schema 682 err = json.Unmarshal(res.GetJson(), &sch) 683 if err != nil { 684 return err 685 } 686 687 for _, pred := range sch.Predicates { 688 ns := x.ParseNamespace(pred.Predicate) 689 l.namespaces[ns] = struct{}{} 690 } 691 return nil 692 } 693 694 func run() error { 695 var zero string 696 if Live.Conf.GetString("slash_grpc_endpoint") != "" { 697 zero = Live.Conf.GetString("slash_grpc_endpoint") 698 } else { 699 zero = Live.Conf.GetString("zero") 700 } 701 702 creds := z.NewSuperFlag(Live.Conf.GetString("creds")).MergeAndCheckDefault(x.DefaultCreds) 703 keys, err := ee.GetKeys(Live.Conf) 704 if err != nil { 705 return err 706 } 707 708 x.PrintVersion() 709 opt = options{ 710 dataFiles: Live.Conf.GetString("files"), 711 dataFormat: Live.Conf.GetString("format"), 712 schemaFile: Live.Conf.GetString("schema"), 713 zero: zero, 714 concurrent: Live.Conf.GetInt("conc"), 715 batchSize: Live.Conf.GetInt("batch"), 716 clientDir: Live.Conf.GetString("xidmap"), 717 authToken: Live.Conf.GetString("auth_token"), 718 useCompression: Live.Conf.GetBool("use_compression"), 719 newUids: Live.Conf.GetBool("new_uids"), 720 verbose: Live.Conf.GetBool("verbose"), 721 httpAddr: Live.Conf.GetString("http"), 722 bufferSize: Live.Conf.GetInt("bufferSize"), 723 upsertPredicate: Live.Conf.GetString("upsertPredicate"), 724 tmpDir: Live.Conf.GetString("tmp"), 725 key: keys.EncKey, 726 } 727 728 forceNs := Live.Conf.GetInt64("force-namespace") 729 switch creds.GetUint64("namespace") { 730 case x.GalaxyNamespace: 731 if forceNs < 0 { 732 opt.preserveNs = true 733 opt.namespaceToLoad = math.MaxUint64 734 } else { 735 opt.namespaceToLoad = uint64(forceNs) 736 } 737 default: 738 if Live.Conf.IsSet("force-namespace") { 739 return errors.Errorf("cannot force namespace %#x when provided creds are not of"+ 740 " guardian of galaxy user", forceNs) 741 } 742 opt.namespaceToLoad = creds.GetUint64("namespace") 743 } 744 745 z.SetTmpDir(opt.tmpDir) 746 747 go func() { 748 if err := http.ListenAndServe(opt.httpAddr, nil); err != nil { 749 glog.Errorf("Error while starting HTTP server: %+v", err) 750 } 751 }() 752 ctx := context.Background() 753 // singleNsOp is set to false, when loading data into a namespace different from the one user 754 // provided credentials for. 755 singleNsOp := true 756 if len(creds.GetString("user")) > 0 && creds.GetUint64("namespace") == x.GalaxyNamespace && 757 opt.namespaceToLoad != x.GalaxyNamespace { 758 singleNsOp = false 759 } 760 galaxyOperation := false 761 if !singleNsOp { 762 // Attach the galaxy to the context to specify that the query/mutations with this context 763 // will be galaxy-wide. 764 galaxyOperation = true 765 ctx = x.AttachGalaxyOperation(ctx, opt.namespaceToLoad) 766 // We don't support upsert predicate while loading data in multiple namespace. 767 if len(opt.upsertPredicate) > 0 { 768 return errors.Errorf("Upsert Predicate feature is not supported for loading" + 769 "into multiple namespaces.") 770 } 771 } 772 773 bmOpts := batchMutationOptions{ 774 Size: opt.batchSize, 775 Pending: opt.concurrent, 776 PrintCounters: true, 777 Ctx: ctx, 778 MaxRetries: math.MaxUint32, 779 bufferSize: opt.bufferSize, 780 } 781 782 // Create directory for temporary buffers. 783 x.Check(os.MkdirAll(opt.tmpDir, 0700)) 784 785 dg, closeFunc := x.GetDgraphClient(Live.Conf, true) 786 defer closeFunc() 787 788 l := setup(bmOpts, dg, Live.Conf) 789 defer l.zeroconn.Close() 790 791 if err := l.populateNamespaces(ctx, dg, singleNsOp); err != nil { 792 fmt.Printf("Error while populating namespaces %s\n", err) 793 return err 794 } 795 796 if !opt.preserveNs { 797 if _, ok := l.namespaces[opt.namespaceToLoad]; !ok { 798 return errors.Errorf("Cannot load into namespace %#x. It does not exist.", 799 opt.namespaceToLoad) 800 } 801 } 802 803 if len(opt.schemaFile) > 0 { 804 err := l.processSchemaFile(ctx, opt.schemaFile, opt.key, dg) 805 if err != nil { 806 if err == context.Canceled { 807 fmt.Printf("Interrupted while processing schema file %q\n", opt.schemaFile) 808 return nil 809 } 810 fmt.Printf("Error while processing schema file %q: %s\n", opt.schemaFile, err) 811 return err 812 } 813 fmt.Printf("Processed schema file %q\n\n", opt.schemaFile) 814 } 815 816 if l.schema, err = getSchema(ctx, dg, galaxyOperation); err != nil { 817 fmt.Printf("Error while loading schema from alpha %s\n", err) 818 return err 819 } 820 821 if opt.dataFiles == "" { 822 return errors.New("RDF or JSON file(s) location must be specified") 823 } 824 825 fs := filestore.NewFileStore(opt.dataFiles) 826 827 filesList := fs.FindDataFiles(opt.dataFiles, []string{".rdf", ".rdf.gz", ".json", ".json.gz"}) 828 totalFiles := len(filesList) 829 if totalFiles == 0 { 830 return errors.Errorf("No data files found in %s", opt.dataFiles) 831 } 832 fmt.Printf("Found %d data file(s) to process\n", totalFiles) 833 834 errCh := make(chan error, totalFiles) 835 for _, file := range filesList { 836 file = strings.Trim(file, " \t") 837 go func(file string) { 838 errCh <- errors.Wrapf(l.processFile(ctx, fs, file, opt.key), file) 839 }(file) 840 } 841 842 // PrintCounters should be called after schema has been updated. 843 if bmOpts.PrintCounters { 844 go l.printCounters() 845 } 846 847 for i := 0; i < totalFiles; i++ { 848 if err := <-errCh; err != nil { 849 fmt.Printf("Error while processing data file %s\n", err) 850 return err 851 } 852 } 853 854 close(l.reqs) 855 // First we wait for requestsWg, when it is done we know all retry requests have been added 856 // to retryRequestsWg. We can't have the same waitgroup as by the time we call Wait, we can't 857 // be sure that all retry requests have been added to the waitgroup. 858 l.requestsWg.Wait() 859 l.retryRequestsWg.Wait() 860 c := l.Counter() 861 var rate uint64 862 if c.Elapsed.Seconds() < 1 { 863 rate = c.Nquads 864 } else { 865 rate = c.Nquads / uint64(c.Elapsed.Seconds()) 866 } 867 // Lets print an empty line, otherwise Interrupted or Number of Mutations overwrites the 868 // previous printed line. 869 fmt.Printf("%100s\r", "") 870 fmt.Printf("Number of TXs run : %d\n", c.TxnsDone) 871 fmt.Printf("Number of N-Quads processed : %d\n", c.Nquads) 872 fmt.Printf("Time spent : %v\n", c.Elapsed) 873 fmt.Printf("N-Quads processed per second : %d\n", rate) 874 875 if err := l.alloc.Flush(); err != nil { 876 return err 877 } 878 if l.db != nil { 879 if err := l.db.Close(); err != nil { 880 return err 881 } 882 } 883 return nil 884 }