github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/reduce.go

github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/reduce.go (about)

     1  /*
     2   * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package bulk
    18  
    19  import (
    20  	"bufio"
    21  	"bytes"
    22  	"context"
    23  	"encoding/binary"
    24  	"fmt"
    25  	"io"
    26  	"log"
    27  	"math"
    28  	"os"
    29  	"path/filepath"
    30  	"runtime"
    31  	"sort"
    32  	"sync"
    33  	"sync/atomic"
    34  	"time"
    35  
    36  	"github.com/dustin/go-humanize"
    37  	"github.com/golang/glog"
    38  	"github.com/golang/snappy"
    39  
    40  	"github.com/dgraph-io/badger/v3"
    41  	bo "github.com/dgraph-io/badger/v3/options"
    42  	bpb "github.com/dgraph-io/badger/v3/pb"
    43  	"github.com/dgraph-io/badger/v3/y"
    44  	"github.com/dgraph-io/dgraph/codec"
    45  	"github.com/dgraph-io/dgraph/posting"
    46  	"github.com/dgraph-io/dgraph/protos/pb"
    47  	"github.com/dgraph-io/dgraph/x"
    48  	"github.com/dgraph-io/ristretto/z"
    49  )
    50  
    51  type reducer struct {
    52  	*state
    53  	streamId  uint32
    54  	mu        sync.RWMutex
    55  	streamIds map[string]uint32
    56  }
    57  
    58  func (r *reducer) run() error {
    59  	dirs := readShardDirs(filepath.Join(r.opt.TmpDir, reduceShardDir))
    60  	x.AssertTrue(len(dirs) == r.opt.ReduceShards)
    61  	x.AssertTrue(len(r.opt.shardOutputDirs) == r.opt.ReduceShards)
    62  
    63  	thr := y.NewThrottle(r.opt.NumReducers)
    64  	for i := 0; i < r.opt.ReduceShards; i++ {
    65  		if err := thr.Do(); err != nil {
    66  			return err
    67  		}
    68  		go func(shardId int, db *badger.DB, tmpDb *badger.DB) {
    69  			defer thr.Done(nil)
    70  
    71  			mapFiles := filenamesInTree(dirs[shardId])
    72  			var mapItrs []*mapIterator
    73  
    74  			// Dedup the partition keys.
    75  			partitions := make(map[string]struct{})
    76  			for _, mapFile := range mapFiles {
    77  				header, itr := newMapIterator(mapFile)
    78  				for _, k := range header.PartitionKeys {
    79  					if len(k) == 0 {
    80  						continue
    81  					}
    82  					partitions[string(k)] = struct{}{}
    83  				}
    84  				mapItrs = append(mapItrs, itr)
    85  			}
    86  
    87  			writer := db.NewStreamWriter()
    88  			x.Check(writer.Prepare())
    89  			// Split lists are written to a separate DB first to avoid ordering issues.
    90  			splitWriter := tmpDb.NewManagedWriteBatch()
    91  
    92  			ci := &countIndexer{
    93  				reducer:     r,
    94  				writer:      writer,
    95  				splitWriter: splitWriter,
    96  				tmpDb:       tmpDb,
    97  				splitCh:     make(chan *bpb.KVList, 2*runtime.NumCPU()),
    98  				countBuf:    getBuf(r.opt.TmpDir),
    99  			}
   100  
   101  			partitionKeys := make([][]byte, 0, len(partitions))
   102  			for k := range partitions {
   103  				partitionKeys = append(partitionKeys, []byte(k))
   104  			}
   105  			sort.Slice(partitionKeys, func(i, j int) bool {
   106  				return bytes.Compare(partitionKeys[i], partitionKeys[j]) < 0
   107  			})
   108  
   109  			r.reduce(partitionKeys, mapItrs, ci)
   110  			ci.wait()
   111  
   112  			fmt.Println("Writing split lists back to the main DB now")
   113  			// Write split lists back to the main DB.
   114  			r.writeSplitLists(db, tmpDb, writer)
   115  
   116  			x.Check(writer.Flush())
   117  
   118  			for _, itr := range mapItrs {
   119  				if err := itr.Close(); err != nil {
   120  					fmt.Printf("Error while closing iterator: %v", err)
   121  				}
   122  			}
   123  		}(i, r.createBadger(i), r.createTmpBadger())
   124  	}
   125  	return thr.Finish()
   126  }
   127  
   128  func (r *reducer) createBadgerInternal(dir string, compression bool) *badger.DB {
   129  	key := r.opt.EncryptionKey
   130  	if !r.opt.EncryptedOut {
   131  		key = nil
   132  	}
   133  
   134  	opt := r.state.opt.Badger.
   135  		WithDir(dir).WithValueDir(dir).
   136  		WithSyncWrites(false).
   137  		WithEncryptionKey(key)
   138  
   139  	opt.Compression = bo.None
   140  	opt.ZSTDCompressionLevel = 0
   141  	// Overwrite badger options based on the options provided by the user.
   142  	if compression {
   143  		opt.Compression = r.state.opt.Badger.Compression
   144  		opt.ZSTDCompressionLevel = r.state.opt.Badger.ZSTDCompressionLevel
   145  	}
   146  
   147  	db, err := badger.OpenManaged(opt)
   148  	x.Check(err)
   149  
   150  	// Zero out the key from memory.
   151  	opt.EncryptionKey = nil
   152  	return db
   153  }
   154  
   155  func (r *reducer) createBadger(i int) *badger.DB {
   156  	db := r.createBadgerInternal(r.opt.shardOutputDirs[i], true)
   157  	r.dbs = append(r.dbs, db)
   158  	return db
   159  }
   160  
   161  func (r *reducer) createTmpBadger() *badger.DB {
   162  	tmpDir, err := os.MkdirTemp(r.opt.TmpDir, "split")
   163  	x.Check(err)
   164  	// Do not enable compression in temporary badger to improve performance.
   165  	db := r.createBadgerInternal(tmpDir, false)
   166  	r.tmpDbs = append(r.tmpDbs, db)
   167  	return db
   168  }
   169  
   170  type mapIterator struct {
   171  	fd     *os.File
   172  	reader *bufio.Reader
   173  	meBuf  []byte
   174  }
   175  
   176  func (mi *mapIterator) Next(cbuf *z.Buffer, partitionKey []byte) {
   177  	readMapEntry := func() error {
   178  		if len(mi.meBuf) > 0 {
   179  			return nil
   180  		}
   181  		r := mi.reader
   182  		sizeBuf, err := r.Peek(binary.MaxVarintLen64)
   183  		if err != nil {
   184  			return err
   185  		}
   186  		sz, n := binary.Uvarint(sizeBuf)
   187  		if n <= 0 {
   188  			log.Fatalf("Could not read uvarint: %d", n)
   189  		}
   190  		x.Check2(r.Discard(n))
   191  		if cap(mi.meBuf) < int(sz) {
   192  			mi.meBuf = make([]byte, int(sz))
   193  		}
   194  		mi.meBuf = mi.meBuf[:int(sz)]
   195  		x.Check2(io.ReadFull(r, mi.meBuf))
   196  		return nil
   197  	}
   198  	for {
   199  		if err := readMapEntry(); err == io.EOF {
   200  			break
   201  		} else {
   202  			x.Check(err)
   203  		}
   204  		key := MapEntry(mi.meBuf).Key()
   205  
   206  		if len(partitionKey) == 0 || bytes.Compare(key, partitionKey) < 0 {
   207  			b := cbuf.SliceAllocate(len(mi.meBuf))
   208  			copy(b, mi.meBuf)
   209  			mi.meBuf = mi.meBuf[:0]
   210  			// map entry is already part of cBuf.
   211  			continue
   212  		}
   213  		// Current key is not part of this batch so track that we have already read the key.
   214  		return
   215  	}
   216  }
   217  
   218  func (mi *mapIterator) Close() error {
   219  	return mi.fd.Close()
   220  }
   221  
   222  func newMapIterator(filename string) (*pb.MapHeader, *mapIterator) {
   223  	fd, err := os.Open(filename)
   224  	x.Check(err)
   225  	r := snappy.NewReader(fd)
   226  
   227  	// Read the header size.
   228  	reader := bufio.NewReaderSize(r, 16<<10)
   229  	headerLenBuf := make([]byte, 4)
   230  	x.Check2(io.ReadFull(reader, headerLenBuf))
   231  	headerLen := binary.BigEndian.Uint32(headerLenBuf)
   232  	// Reader the map header.
   233  	headerBuf := make([]byte, headerLen)
   234  
   235  	x.Check2(io.ReadFull(reader, headerBuf))
   236  	header := &pb.MapHeader{}
   237  	err = header.Unmarshal(headerBuf)
   238  	x.Check(err)
   239  
   240  	itr := &mapIterator{
   241  		fd:     fd,
   242  		reader: reader,
   243  	}
   244  	return header, itr
   245  }
   246  
   247  type encodeRequest struct {
   248  	cbuf     *z.Buffer
   249  	countBuf *z.Buffer
   250  	wg       *sync.WaitGroup
   251  	listCh   chan *z.Buffer
   252  	splitCh  chan *bpb.KVList
   253  }
   254  
   255  func (r *reducer) streamIdFor(pred string) uint32 {
   256  	r.mu.RLock()
   257  	if id, ok := r.streamIds[pred]; ok {
   258  		r.mu.RUnlock()
   259  		return id
   260  	}
   261  	r.mu.RUnlock()
   262  	r.mu.Lock()
   263  	defer r.mu.Unlock()
   264  	if id, ok := r.streamIds[pred]; ok {
   265  		return id
   266  	}
   267  	streamId := atomic.AddUint32(&r.streamId, 1)
   268  	r.streamIds[pred] = streamId
   269  	return streamId
   270  }
   271  
   272  func (r *reducer) encode(entryCh chan *encodeRequest, closer *z.Closer) {
   273  	defer closer.Done()
   274  
   275  	for req := range entryCh {
   276  		r.toList(req)
   277  		req.wg.Done()
   278  	}
   279  }
   280  
   281  const maxSplitBatchLen = 1000
   282  
   283  func (r *reducer) writeTmpSplits(ci *countIndexer, wg *sync.WaitGroup) {
   284  	defer wg.Done()
   285  	splitBatchLen := 0
   286  
   287  	for kvs := range ci.splitCh {
   288  		if kvs == nil || len(kvs.Kv) == 0 {
   289  			continue
   290  		}
   291  
   292  		for i := 0; i < len(kvs.Kv); i += maxSplitBatchLen {
   293  			// flush the write batch when the max batch length is reached to prevent the
   294  			// value log from growing over the allowed limit.
   295  			if splitBatchLen >= maxSplitBatchLen {
   296  				x.Check(ci.splitWriter.Flush())
   297  				ci.splitWriter = ci.tmpDb.NewManagedWriteBatch()
   298  				splitBatchLen = 0
   299  			}
   300  
   301  			batch := &bpb.KVList{}
   302  			if i+maxSplitBatchLen >= len(kvs.Kv) {
   303  				batch.Kv = kvs.Kv[i:]
   304  			} else {
   305  				batch.Kv = kvs.Kv[i : i+maxSplitBatchLen]
   306  			}
   307  			splitBatchLen += len(batch.Kv)
   308  			x.Check(ci.splitWriter.WriteList(batch))
   309  		}
   310  	}
   311  	x.Check(ci.splitWriter.Flush())
   312  }
   313  
   314  func (r *reducer) startWriting(ci *countIndexer, writerCh chan *encodeRequest, closer *z.Closer) {
   315  	defer closer.Done()
   316  
   317  	// Concurrently write split lists to a temporary badger.
   318  	tmpWg := new(sync.WaitGroup)
   319  	tmpWg.Add(1)
   320  	go r.writeTmpSplits(ci, tmpWg)
   321  
   322  	count := func(req *encodeRequest) {
   323  		defer func() {
   324  			if err := req.countBuf.Release(); err != nil {
   325  				glog.Warningf("error in releasing buffer: %v", err)
   326  			}
   327  		}()
   328  		if req.countBuf.IsEmpty() {
   329  			return
   330  		}
   331  
   332  		// req.countBuf is already sorted.
   333  		sz := req.countBuf.LenNoPadding()
   334  		ci.countBuf.Grow(sz)
   335  
   336  		if err := req.countBuf.SliceIterate(func(slice []byte) error {
   337  			ce := countEntry(slice)
   338  			ci.addCountEntry(ce)
   339  			return nil
   340  		}); err != nil {
   341  			glog.Errorf("error while iterating over buf: %v", err)
   342  			x.Check(err)
   343  		}
   344  	}
   345  
   346  	var lastStreamId uint32
   347  	write := func(req *encodeRequest) {
   348  		for kvBuf := range req.listCh {
   349  			x.Check(ci.writer.Write(kvBuf))
   350  
   351  			kv := &bpb.KV{}
   352  			err := kvBuf.SliceIterate(func(s []byte) error {
   353  				kv.Reset()
   354  				x.Check(kv.Unmarshal(s))
   355  				if lastStreamId == kv.StreamId {
   356  					return nil
   357  				}
   358  				if lastStreamId > 0 {
   359  					fmt.Printf("Finishing stream id: %d\n", lastStreamId)
   360  					doneKV := &bpb.KV{
   361  						StreamId:   lastStreamId,
   362  						StreamDone: true,
   363  					}
   364  
   365  					buf := z.NewBuffer(512, "Reducer.Write")
   366  					defer func() {
   367  						if err := buf.Release(); err != nil {
   368  							glog.Warningf("error in releasing buffer: %v", err)
   369  						}
   370  					}()
   371  					badger.KVToBuffer(doneKV, buf)
   372  
   373  					if err := ci.writer.Write(buf); err != nil {
   374  						glog.Warningf("error in releasing buffer: %v", err)
   375  					}
   376  				}
   377  				lastStreamId = kv.StreamId
   378  				return nil
   379  
   380  			})
   381  			x.Check(err)
   382  			if err := kvBuf.Release(); err != nil {
   383  				glog.Warningf("error in releasing buffer: %v", err)
   384  			}
   385  		}
   386  	}
   387  
   388  	for req := range writerCh {
   389  		write(req)
   390  		req.wg.Wait()
   391  
   392  		count(req)
   393  	}
   394  
   395  	// Wait for split lists to be written to the temporary badger.
   396  	close(ci.splitCh)
   397  	tmpWg.Wait()
   398  }
   399  
   400  func (r *reducer) writeSplitLists(db, tmpDb *badger.DB, writer *badger.StreamWriter) {
   401  	// baseStreamId is the max ID seen while writing non-split lists.
   402  	baseStreamId := atomic.AddUint32(&r.streamId, 1)
   403  	stream := tmpDb.NewStreamAt(math.MaxUint64)
   404  	stream.LogPrefix = "copying split keys to main DB"
   405  	stream.Send = func(buf *z.Buffer) error {
   406  		kvs, err := badger.BufferToKVList(buf)
   407  		x.Check(err)
   408  
   409  		buf.Reset()
   410  		for _, kv := range kvs.Kv {
   411  			kv.StreamId += baseStreamId
   412  			badger.KVToBuffer(kv, buf)
   413  		}
   414  		x.Check(writer.Write(buf))
   415  		return nil
   416  	}
   417  	x.Check(stream.Orchestrate(context.Background()))
   418  }
   419  
   420  const limit = 2 << 30
   421  
   422  func (r *reducer) throttle() {
   423  	for {
   424  		sz := atomic.LoadInt64(&r.prog.numEncoding)
   425  		if sz < limit {
   426  			return
   427  		}
   428  		time.Sleep(time.Second)
   429  	}
   430  }
   431  
   432  func bufferStats(cbuf *z.Buffer) {
   433  	fmt.Printf("Found a buffer of size: %s\n", humanize.IBytes(uint64(cbuf.LenNoPadding())))
   434  
   435  	// Just check how many keys do we have in this giant buffer.
   436  	keys := make(map[uint64]int64)
   437  	var numEntries int
   438  	if err := cbuf.SliceIterate(func(slice []byte) error {
   439  		me := MapEntry(slice)
   440  		keys[z.MemHash(me.Key())]++
   441  		numEntries++
   442  		return nil
   443  	}); err != nil {
   444  		glog.Errorf("error while iterating over buf: %v", err)
   445  		x.Check(err)
   446  	}
   447  
   448  	keyHist := z.NewHistogramData(z.HistogramBounds(10, 32))
   449  	for _, num := range keys {
   450  		keyHist.Update(num)
   451  	}
   452  	fmt.Printf("Num Entries: %d. Total keys: %d\n Histogram: %s\n",
   453  		numEntries, len(keys), keyHist.String())
   454  }
   455  
   456  func getBuf(dir string) *z.Buffer {
   457  	return z.NewBuffer(64<<20, "Reducer.GetBuf").
   458  		WithAutoMmap(1<<30, filepath.Join(dir, bufferDir)).
   459  		WithMaxSize(64 << 30)
   460  }
   461  
   462  func (r *reducer) reduce(partitionKeys [][]byte, mapItrs []*mapIterator, ci *countIndexer) {
   463  	cpu := r.opt.NumGoroutines
   464  	fmt.Printf("Num Encoders: %d\n", cpu)
   465  	encoderCh := make(chan *encodeRequest, 2*cpu)
   466  	writerCh := make(chan *encodeRequest, 2*cpu)
   467  	encoderCloser := z.NewCloser(cpu)
   468  	for i := 0; i < cpu; i++ {
   469  		// Start listening to encode entries
   470  		// For time being let's lease 100 stream id for each encoder.
   471  		go r.encode(encoderCh, encoderCloser)
   472  	}
   473  	// Start listening to write the badger list.
   474  	writerCloser := z.NewCloser(1)
   475  	go r.startWriting(ci, writerCh, writerCloser)
   476  
   477  	sendReq := func(zbuf *z.Buffer) {
   478  		wg := new(sync.WaitGroup)
   479  		wg.Add(1)
   480  		req := &encodeRequest{
   481  			cbuf:     zbuf,
   482  			wg:       wg,
   483  			listCh:   make(chan *z.Buffer, 3),
   484  			splitCh:  ci.splitCh,
   485  			countBuf: getBuf(r.opt.TmpDir),
   486  		}
   487  		encoderCh <- req
   488  		writerCh <- req
   489  	}
   490  
   491  	ticker := time.NewTicker(time.Minute)
   492  	defer ticker.Stop()
   493  
   494  	buffers := make(chan *z.Buffer, 3)
   495  
   496  	go func() {
   497  		// Start collecting buffers.
   498  		hd := z.NewHistogramData(z.HistogramBounds(16, 40))
   499  		cbuf := getBuf(r.opt.TmpDir)
   500  		// Append nil for the last entries.
   501  		partitionKeys = append(partitionKeys, nil)
   502  
   503  		for i := 0; i < len(partitionKeys); i++ {
   504  			pkey := partitionKeys[i]
   505  			for _, itr := range mapItrs {
   506  				itr.Next(cbuf, pkey)
   507  			}
   508  			if cbuf.LenNoPadding() < 256<<20 {
   509  				// Pick up more data.
   510  				continue
   511  			}
   512  
   513  			hd.Update(int64(cbuf.LenNoPadding()))
   514  			select {
   515  			case <-ticker.C:
   516  				fmt.Printf("Histogram of buffer sizes: %s\n", hd.String())
   517  			default:
   518  			}
   519  
   520  			buffers <- cbuf
   521  			cbuf = getBuf(r.opt.TmpDir)
   522  		}
   523  		if !cbuf.IsEmpty() {
   524  			hd.Update(int64(cbuf.LenNoPadding()))
   525  			buffers <- cbuf
   526  		} else {
   527  			if err := cbuf.Release(); err != nil {
   528  				glog.Warningf("error in releasing buffer: %v", err)
   529  			}
   530  		}
   531  		fmt.Printf("Final Histogram of buffer sizes: %s\n", hd.String())
   532  		close(buffers)
   533  	}()
   534  
   535  	for cbuf := range buffers {
   536  		if cbuf.LenNoPadding() > limit/2 {
   537  			bufferStats(cbuf)
   538  		}
   539  		r.throttle()
   540  
   541  		atomic.AddInt64(&r.prog.numEncoding, int64(cbuf.LenNoPadding()))
   542  		sendReq(cbuf)
   543  	}
   544  
   545  	// Close the encodes.
   546  	close(encoderCh)
   547  	encoderCloser.SignalAndWait()
   548  
   549  	// Close the writer.
   550  	close(writerCh)
   551  	writerCloser.SignalAndWait()
   552  }
   553  
   554  func (r *reducer) toList(req *encodeRequest) {
   555  	cbuf := req.cbuf
   556  	defer func() {
   557  		atomic.AddInt64(&r.prog.numEncoding, -int64(cbuf.LenNoPadding()))
   558  		if err := cbuf.Release(); err != nil {
   559  			glog.Warningf("error in releasing buffer: %v", err)
   560  		}
   561  	}()
   562  
   563  	cbuf.SortSlice(func(ls, rs []byte) bool {
   564  		lhs := MapEntry(ls)
   565  		rhs := MapEntry(rs)
   566  		return less(lhs, rhs)
   567  	})
   568  
   569  	var currentKey []byte
   570  	pl := new(pb.PostingList)
   571  	writeVersionTs := r.state.writeTs
   572  
   573  	kvBuf := z.NewBuffer(260<<20, "Reducer.Buffer.ToList")
   574  	trackCountIndex := make(map[string]bool)
   575  
   576  	var freePostings []*pb.Posting
   577  
   578  	getPosting := func() *pb.Posting {
   579  		if sz := len(freePostings); sz > 0 {
   580  			last := freePostings[sz-1]
   581  			freePostings = freePostings[:sz-1]
   582  			return last
   583  		}
   584  		return &pb.Posting{}
   585  	}
   586  
   587  	freePosting := func(p *pb.Posting) {
   588  		p.Reset()
   589  		freePostings = append(freePostings, p)
   590  	}
   591  
   592  	alloc := z.NewAllocator(16<<20, "Reducer.ToList")
   593  	defer func() {
   594  		// We put alloc.Release in defer because we reassign alloc for split posting lists.
   595  		alloc.Release()
   596  	}()
   597  
   598  	start, end, num := cbuf.StartOffset(), cbuf.StartOffset(), 0
   599  	appendToList := func() {
   600  		if num == 0 {
   601  			return
   602  		}
   603  		atomic.AddInt64(&r.prog.reduceEdgeCount, int64(num))
   604  
   605  		pk, err := x.Parse(currentKey)
   606  		x.Check(err)
   607  		x.AssertTrue(len(pk.Attr) > 0)
   608  
   609  		// We might not need to track count index every time.
   610  		if pk.IsData() || pk.IsReverse() {
   611  			doCount, ok := trackCountIndex[pk.Attr]
   612  			if !ok {
   613  				doCount = r.schema.getSchema(pk.Attr).GetCount()
   614  				trackCountIndex[pk.Attr] = doCount
   615  			}
   616  			if doCount {
   617  				// Calculate count entries.
   618  				ck := x.CountKey(pk.Attr, uint32(num), pk.IsReverse())
   619  				dst := req.countBuf.SliceAllocate(countEntrySize(ck))
   620  				marshalCountEntry(dst, ck, pk.Uid)
   621  			}
   622  		}
   623  
   624  		alloc.Reset()
   625  		enc := codec.Encoder{BlockSize: 256, Alloc: alloc}
   626  		var lastUid uint64
   627  		var slice []byte
   628  		next := start
   629  		for next >= 0 && (next < end || end == -1) {
   630  			slice, next = cbuf.Slice(next)
   631  			me := MapEntry(slice)
   632  
   633  			uid := me.Uid()
   634  			if uid == lastUid {
   635  				continue
   636  			}
   637  			lastUid = uid
   638  
   639  			enc.Add(uid)
   640  			if pbuf := me.Plist(); len(pbuf) > 0 {
   641  				p := getPosting()
   642  				x.Check(p.Unmarshal(pbuf))
   643  				pl.Postings = append(pl.Postings, p)
   644  			}
   645  		}
   646  
   647  		// We should not do defer FreePack here, because we might be giving ownership of it away if
   648  		// we run Rollup.
   649  		pl.Pack = enc.Done()
   650  		numUids := codec.ExactLen(pl.Pack)
   651  
   652  		atomic.AddInt64(&r.prog.reduceKeyCount, 1)
   653  
   654  		// For a UID-only posting list, the badger value is a delta packed UID
   655  		// list. The UserMeta indicates to treat the value as a delta packed
   656  		// list when the value is read by dgraph.  For a value posting list,
   657  		// the full pb.Posting type is used (which pb.y contains the
   658  		// delta packed UID list).
   659  		if numUids == 0 {
   660  			// No need to FrePack here because we are reusing alloc.
   661  			return
   662  		}
   663  
   664  		// If the schema is of type uid and not a list but we have more than one uid in this
   665  		// list, we cannot enforce the constraint without losing data. Inform the user and
   666  		// force the schema to be a list so that all the data can be found when Dgraph is started.
   667  		// The user should fix their data once Dgraph is up.
   668  		parsedKey, err := x.Parse(currentKey)
   669  		x.Check(err)
   670  		if parsedKey.IsData() {
   671  			schema := r.state.schema.getSchema(parsedKey.Attr)
   672  			if schema.GetValueType() == pb.Posting_UID && !schema.GetList() && numUids > 1 {
   673  				fmt.Printf("Schema for pred %s specifies that this is not a list but more than  "+
   674  					"one UID has been found. Forcing the schema to be a list to avoid any "+
   675  					"data loss. Please fix the data to your specifications once Dgraph is up.\n",
   676  					parsedKey.Attr)
   677  				r.state.schema.setSchemaAsList(parsedKey.Attr)
   678  			}
   679  		}
   680  
   681  		shouldSplit := pl.Size() > (1<<20)/2 && len(pl.Pack.Blocks) > 1
   682  		if shouldSplit {
   683  			// Give ownership of pl.Pack away to list. Rollup would deallocate the Pack.
   684  			l := posting.NewList(y.Copy(currentKey), pl, writeVersionTs)
   685  			kvs, err := l.Rollup(nil)
   686  			x.Check(err)
   687  
   688  			// Assign a new allocator, so we don't reset the one we were using during Rollup.
   689  			alloc = z.NewAllocator(16<<20, "Reducer.AppendToList")
   690  
   691  			for _, kv := range kvs {
   692  				kv.StreamId = r.streamIdFor(pk.Attr)
   693  			}
   694  			badger.KVToBuffer(kvs[0], kvBuf)
   695  			if splits := kvs[1:]; len(splits) > 0 {
   696  				req.splitCh <- &bpb.KVList{Kv: splits}
   697  			}
   698  		} else {
   699  			kv := posting.MarshalPostingList(pl, nil)
   700  			// No need to FreePack here, because we are reusing alloc.
   701  
   702  			kv.Key = y.Copy(currentKey)
   703  			kv.Version = writeVersionTs
   704  			kv.StreamId = r.streamIdFor(pk.Attr)
   705  			badger.KVToBuffer(kv, kvBuf)
   706  		}
   707  
   708  		for _, p := range pl.Postings {
   709  			freePosting(p)
   710  		}
   711  		pl.Reset()
   712  	}
   713  
   714  	for end >= 0 {
   715  		slice, next := cbuf.Slice(end)
   716  		entry := MapEntry(slice)
   717  		entryKey := entry.Key()
   718  
   719  		if !bytes.Equal(entryKey, currentKey) && currentKey != nil {
   720  			appendToList()
   721  			start, num = end, 0 // Start would start from current one.
   722  
   723  			if kvBuf.LenNoPadding() > 256<<20 {
   724  				req.listCh <- kvBuf
   725  				kvBuf = z.NewBuffer(260<<20, "Reducer.Buffer.KVBuffer")
   726  			}
   727  		}
   728  		end = next
   729  		currentKey = append(currentKey[:0], entryKey...)
   730  		num++
   731  	}
   732  
   733  	appendToList()
   734  	if kvBuf.LenNoPadding() > 0 {
   735  		req.listCh <- kvBuf
   736  	} else {
   737  		if err := kvBuf.Release(); err != nil {
   738  			glog.Warningf("error in releasing buffer: %v", err)
   739  		}
   740  	}
   741  	close(req.listCh)
   742  
   743  	// Sort countBuf before returning to better use the goroutines.
   744  	req.countBuf.SortSlice(func(ls, rs []byte) bool {
   745  		left := countEntry(ls)
   746  		right := countEntry(rs)
   747  		return left.less(right)
   748  	})
   749  }