github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/live/batch.go

github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/live/batch.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package live
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"math/rand"
    23  	"strings"
    24  	"sync"
    25  	"sync/atomic"
    26  	"time"
    27  
    28  	"google.golang.org/grpc"
    29  	"google.golang.org/grpc/codes"
    30  	"google.golang.org/grpc/status"
    31  
    32  	"github.com/dgraph-io/badger"
    33  	"github.com/dgraph-io/dgo"
    34  	"github.com/dgraph-io/dgo/protos/api"
    35  	"github.com/dgraph-io/dgo/y"
    36  	"github.com/dgraph-io/dgraph/x"
    37  	"github.com/dgraph-io/dgraph/xidmap"
    38  	"github.com/dustin/go-humanize/english"
    39  )
    40  
    41  // batchMutationOptions sets the clients batch mode to Pending number of buffers each of Size.
    42  // Running counters of number of rdfs processed, total time and mutations per second are printed
    43  // if PrintCounters is set true.  See Counter.
    44  type batchMutationOptions struct {
    45  	Size          int
    46  	Pending       int
    47  	PrintCounters bool
    48  	MaxRetries    uint32
    49  	// User could pass a context so that we can stop retrying requests once context is done
    50  	Ctx context.Context
    51  }
    52  
    53  // loader is the data structure held by the user program for all interactions with the Dgraph
    54  // server.  After making grpc connection a new Dgraph is created by function NewDgraphClient.
    55  type loader struct {
    56  	opts batchMutationOptions
    57  
    58  	dc         *dgo.Dgraph
    59  	alloc      *xidmap.XidMap
    60  	ticker     *time.Ticker
    61  	db         *badger.DB
    62  	requestsWg sync.WaitGroup
    63  	// If we retry a request, we add one to retryRequestsWg.
    64  	retryRequestsWg sync.WaitGroup
    65  
    66  	// Miscellaneous information to print counters.
    67  	// Num of N-Quads sent
    68  	nquads uint64
    69  	// Num of txns sent
    70  	txns uint64
    71  	// Num of aborts
    72  	aborts uint64
    73  	// To get time elapsed
    74  	start time.Time
    75  
    76  	reqNum   uint64
    77  	reqs     chan api.Mutation
    78  	zeroconn *grpc.ClientConn
    79  }
    80  
    81  // Counter keeps a track of various parameters about a batch mutation. Running totals are printed
    82  // if BatchMutationOptions PrintCounters is set to true.
    83  type Counter struct {
    84  	// Number of N-Quads processed by server.
    85  	Nquads uint64
    86  	// Number of mutations processed by the server.
    87  	TxnsDone uint64
    88  	// Number of Aborts
    89  	Aborts uint64
    90  	// Time elapsed since the batch started.
    91  	Elapsed time.Duration
    92  }
    93  
    94  // handleError inspects errors and terminates if the errors are non-recoverable.
    95  // A gRPC code is Internal if there is an unforeseen issue that needs attention.
    96  // A gRPC code is Unavailable when we can't possibly reach the remote server, most likely the
    97  // server expects TLS and our certificate does not match or the host name is not verified. When
    98  // the node certificate is created the name much match the request host name. e.g., localhost not
    99  // 127.0.0.1.
   100  func handleError(err error, reqNum uint64, isRetry bool) {
   101  	s := status.Convert(err)
   102  	switch {
   103  	case s.Code() == codes.Internal, s.Code() == codes.Unavailable:
   104  		x.Fatalf(s.Message())
   105  	case strings.Contains(s.Message(), "x509"):
   106  		x.Fatalf(s.Message())
   107  	case s.Code() == codes.Aborted:
   108  		if !isRetry && opt.verbose {
   109  			fmt.Printf("Transaction #%d aborted. Will retry in background.\n", reqNum)
   110  		}
   111  	case strings.Contains(s.Message(), "Server overloaded."):
   112  		dur := time.Duration(1+rand.Intn(10)) * time.Minute
   113  		fmt.Printf("Server is overloaded. Will retry after %s.\n", dur.Round(time.Minute))
   114  		time.Sleep(dur)
   115  	case err != y.ErrConflict && err != y.ErrAborted:
   116  		fmt.Printf("Error while mutating: %v s.Code %v\n", s.Message(), s.Code())
   117  	}
   118  }
   119  
   120  func (l *loader) infinitelyRetry(req api.Mutation, reqNum uint64) {
   121  	defer l.retryRequestsWg.Done()
   122  	nretries := 1
   123  	for i := time.Millisecond; ; i *= 2 {
   124  		txn := l.dc.NewTxn()
   125  		req.CommitNow = true
   126  		_, err := txn.Mutate(l.opts.Ctx, &req)
   127  		if err == nil {
   128  			if opt.verbose {
   129  				fmt.Printf("Transaction #%d succeeded after %s.\n",
   130  					reqNum, english.Plural(nretries, "retry", "retries"))
   131  			}
   132  			atomic.AddUint64(&l.nquads, uint64(len(req.Set)))
   133  			atomic.AddUint64(&l.txns, 1)
   134  			return
   135  		}
   136  		nretries++
   137  		handleError(err, reqNum, true)
   138  		atomic.AddUint64(&l.aborts, 1)
   139  		if i >= 10*time.Second {
   140  			i = 10 * time.Second
   141  		}
   142  		time.Sleep(i)
   143  	}
   144  }
   145  
   146  func (l *loader) request(req api.Mutation, reqNum uint64) {
   147  	txn := l.dc.NewTxn()
   148  	req.CommitNow = true
   149  	_, err := txn.Mutate(l.opts.Ctx, &req)
   150  
   151  	if err == nil {
   152  		atomic.AddUint64(&l.nquads, uint64(len(req.Set)))
   153  		atomic.AddUint64(&l.txns, 1)
   154  		return
   155  	}
   156  	handleError(err, reqNum, false)
   157  	atomic.AddUint64(&l.aborts, 1)
   158  	l.retryRequestsWg.Add(1)
   159  	go l.infinitelyRetry(req, reqNum)
   160  }
   161  
   162  // makeRequests can receive requests from batchNquads or directly from BatchSetWithMark.
   163  // It doesn't need to batch the requests anymore. Batching is already done for it by the
   164  // caller functions.
   165  func (l *loader) makeRequests() {
   166  	defer l.requestsWg.Done()
   167  	for req := range l.reqs {
   168  		reqNum := atomic.AddUint64(&l.reqNum, 1)
   169  		l.request(req, reqNum)
   170  	}
   171  }
   172  
   173  func (l *loader) printCounters() {
   174  	period := 5 * time.Second
   175  	l.ticker = time.NewTicker(period)
   176  	start := time.Now()
   177  
   178  	var last Counter
   179  	for range l.ticker.C {
   180  		counter := l.Counter()
   181  		rate := float64(counter.Nquads-last.Nquads) / period.Seconds()
   182  		elapsed := time.Since(start).Round(time.Second)
   183  		timestamp := time.Now().Format("15:04:05Z0700")
   184  		fmt.Printf("[%s] Elapsed: %s Txns: %d N-Quads: %d N-Quads/s [last 5s]: %5.0f Aborts: %d\n",
   185  			timestamp, x.FixedDuration(elapsed), counter.TxnsDone, counter.Nquads, rate, counter.Aborts)
   186  		last = counter
   187  	}
   188  }
   189  
   190  // Counter returns the current state of the BatchMutation.
   191  func (l *loader) Counter() Counter {
   192  	return Counter{
   193  		Nquads:   atomic.LoadUint64(&l.nquads),
   194  		TxnsDone: atomic.LoadUint64(&l.txns),
   195  		Elapsed:  time.Since(l.start),
   196  		Aborts:   atomic.LoadUint64(&l.aborts),
   197  	}
   198  }