github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/count_index.go (about)

     1  /*
     2   * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package bulk
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/binary"
    22  	"fmt"
    23  	"sync"
    24  	"sync/atomic"
    25  
    26  	"github.com/golang/glog"
    27  
    28  	"github.com/dgraph-io/badger/v3"
    29  	"github.com/dgraph-io/dgraph/codec"
    30  	"github.com/dgraph-io/dgraph/posting"
    31  	"github.com/dgraph-io/dgraph/protos/pb"
    32  	"github.com/dgraph-io/dgraph/x"
    33  	"github.com/dgraph-io/ristretto/z"
    34  )
    35  
    36  // type countEntry struct {
    37  // uid uint64
    38  // key []byte
    39  // }
    40  
    41  type countEntry []byte
    42  
    43  func countEntrySize(key []byte) int {
    44  	return 8 + 4 + len(key)
    45  }
    46  func marshalCountEntry(dst, key []byte, uid uint64) {
    47  	binary.BigEndian.PutUint64(dst[0:8], uid)
    48  
    49  	binary.BigEndian.PutUint32(dst[8:12], uint32(len(key)))
    50  	n := copy(dst[12:], key)
    51  	x.AssertTrue(len(dst) == n+12)
    52  }
    53  func (ci countEntry) Uid() uint64 {
    54  	return binary.BigEndian.Uint64(ci[0:8])
    55  }
    56  func (ci countEntry) Key() []byte {
    57  	sz := binary.BigEndian.Uint32(ci[8:12])
    58  	return ci[12 : 12+sz]
    59  }
    60  func (ci countEntry) less(oe countEntry) bool {
    61  	lk, rk := ci.Key(), oe.Key()
    62  	if cmp := bytes.Compare(lk, rk); cmp != 0 {
    63  		return cmp < 0
    64  	}
    65  	return ci.Uid() < oe.Uid()
    66  }
    67  
    68  type current struct {
    69  	pred  string
    70  	rev   bool
    71  	track bool
    72  }
    73  
    74  type countIndexer struct {
    75  	*reducer
    76  	writer      *badger.StreamWriter
    77  	splitWriter *badger.WriteBatch
    78  	splitCh     chan *badger.KVList
    79  	tmpDb       *badger.DB
    80  	cur         current
    81  	countBuf    *z.Buffer
    82  	wg          sync.WaitGroup
    83  }
    84  
    85  // addUid adds the uid from rawKey to a count index if a count index is
    86  // required by the schema. This method expects keys to be passed into it in
    87  // sorted order.
    88  func (c *countIndexer) addCountEntry(ce countEntry) {
    89  	pk, err := x.Parse(ce.Key())
    90  	x.Check(err)
    91  
    92  	sameIndexKey := pk.Attr == c.cur.pred && pk.IsReverse() == c.cur.rev
    93  	if sameIndexKey && !c.cur.track {
    94  		return
    95  	}
    96  
    97  	if !sameIndexKey {
    98  		if c.countBuf.LenNoPadding() > 0 {
    99  			c.wg.Add(1)
   100  			go c.writeIndex(c.countBuf)
   101  			c.countBuf = getBuf(c.opt.TmpDir)
   102  		}
   103  		c.cur.pred = pk.Attr
   104  		c.cur.rev = pk.IsReverse()
   105  		c.cur.track = c.schema.getSchema(pk.Attr).GetCount()
   106  	}
   107  	if c.cur.track {
   108  		dst := c.countBuf.SliceAllocate(len(ce))
   109  		copy(dst, ce)
   110  	}
   111  }
   112  
   113  func (c *countIndexer) writeIndex(buf *z.Buffer) {
   114  	defer func() {
   115  		c.wg.Done()
   116  		if err := buf.Release(); err != nil {
   117  			glog.Warningf("error in releasing buffer: %v", err)
   118  		}
   119  
   120  	}()
   121  	if buf.IsEmpty() {
   122  		return
   123  	}
   124  
   125  	streamId := atomic.AddUint32(&c.streamId, 1)
   126  	buf.SortSlice(func(ls, rs []byte) bool {
   127  		left := countEntry(ls)
   128  		right := countEntry(rs)
   129  		return left.less(right)
   130  	})
   131  
   132  	tmp, _ := buf.Slice(buf.StartOffset())
   133  	lastCe := countEntry(tmp)
   134  	{
   135  		pk, err := x.Parse(lastCe.Key())
   136  		x.Check(err)
   137  		fmt.Printf("Writing count index for %q rev=%v\n", pk.Attr, pk.IsReverse())
   138  	}
   139  
   140  	alloc := z.NewAllocator(8<<20, "CountIndexer.WriteIndex")
   141  	defer alloc.Release()
   142  
   143  	var pl pb.PostingList
   144  	encoder := codec.Encoder{BlockSize: 256, Alloc: alloc}
   145  
   146  	outBuf := z.NewBuffer(5<<20, "CountIndexer.Buffer.WriteIndex")
   147  	defer func() {
   148  		if err := outBuf.Release(); err != nil {
   149  			glog.Warningf("error in releasing buffer: %v", err)
   150  		}
   151  	}()
   152  	encode := func() {
   153  		pl.Pack = encoder.Done()
   154  		if codec.ExactLen(pl.Pack) == 0 {
   155  			return
   156  		}
   157  
   158  		kv := posting.MarshalPostingList(&pl, nil)
   159  		kv.Key = append([]byte{}, lastCe.Key()...)
   160  		kv.Version = c.state.writeTs
   161  		kv.StreamId = streamId
   162  		badger.KVToBuffer(kv, outBuf)
   163  
   164  		alloc.Reset()
   165  		encoder = codec.Encoder{BlockSize: 256, Alloc: alloc}
   166  		pl.Reset()
   167  
   168  		// flush out the buffer.
   169  		if outBuf.LenNoPadding() > 4<<20 {
   170  			x.Check(c.writer.Write(outBuf))
   171  			outBuf.Reset()
   172  		}
   173  	}
   174  
   175  	if err := buf.SliceIterate(func(slice []byte) error {
   176  		ce := countEntry(slice)
   177  		if !bytes.Equal(lastCe.Key(), ce.Key()) {
   178  			encode()
   179  		}
   180  		encoder.Add(ce.Uid())
   181  		lastCe = ce
   182  		return nil
   183  	}); err != nil {
   184  		glog.Errorf("error while counting in buf: %v\n", err)
   185  		x.Check(err)
   186  	}
   187  	encode()
   188  	x.Check(c.writer.Write(outBuf))
   189  }
   190  
   191  func (c *countIndexer) wait() {
   192  	if c.countBuf.LenNoPadding() > 0 {
   193  		c.wg.Add(1)
   194  		go c.writeIndex(c.countBuf)
   195  	} else {
   196  		if err := c.countBuf.Release(); err != nil {
   197  			glog.Warningf("error in releasing buffer: %v", err)
   198  		}
   199  	}
   200  	c.wg.Wait()
   201  }