github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/reduce.go (about) 1 /* 2 * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package bulk 18 19 import ( 20 "bufio" 21 "bytes" 22 "context" 23 "encoding/binary" 24 "fmt" 25 "io" 26 "log" 27 "math" 28 "os" 29 "path/filepath" 30 "runtime" 31 "sort" 32 "sync" 33 "sync/atomic" 34 "time" 35 36 "github.com/dustin/go-humanize" 37 "github.com/golang/glog" 38 "github.com/golang/snappy" 39 40 "github.com/dgraph-io/badger/v3" 41 bo "github.com/dgraph-io/badger/v3/options" 42 bpb "github.com/dgraph-io/badger/v3/pb" 43 "github.com/dgraph-io/badger/v3/y" 44 "github.com/dgraph-io/dgraph/codec" 45 "github.com/dgraph-io/dgraph/posting" 46 "github.com/dgraph-io/dgraph/protos/pb" 47 "github.com/dgraph-io/dgraph/x" 48 "github.com/dgraph-io/ristretto/z" 49 ) 50 51 type reducer struct { 52 *state 53 streamId uint32 54 mu sync.RWMutex 55 streamIds map[string]uint32 56 } 57 58 func (r *reducer) run() error { 59 dirs := readShardDirs(filepath.Join(r.opt.TmpDir, reduceShardDir)) 60 x.AssertTrue(len(dirs) == r.opt.ReduceShards) 61 x.AssertTrue(len(r.opt.shardOutputDirs) == r.opt.ReduceShards) 62 63 thr := y.NewThrottle(r.opt.NumReducers) 64 for i := 0; i < r.opt.ReduceShards; i++ { 65 if err := thr.Do(); err != nil { 66 return err 67 } 68 go func(shardId int, db *badger.DB, tmpDb *badger.DB) { 69 defer thr.Done(nil) 70 71 mapFiles := filenamesInTree(dirs[shardId]) 72 var mapItrs []*mapIterator 73 74 // Dedup the partition keys. 75 partitions := make(map[string]struct{}) 76 for _, mapFile := range mapFiles { 77 header, itr := newMapIterator(mapFile) 78 for _, k := range header.PartitionKeys { 79 if len(k) == 0 { 80 continue 81 } 82 partitions[string(k)] = struct{}{} 83 } 84 mapItrs = append(mapItrs, itr) 85 } 86 87 writer := db.NewStreamWriter() 88 x.Check(writer.Prepare()) 89 // Split lists are written to a separate DB first to avoid ordering issues. 90 splitWriter := tmpDb.NewManagedWriteBatch() 91 92 ci := &countIndexer{ 93 reducer: r, 94 writer: writer, 95 splitWriter: splitWriter, 96 tmpDb: tmpDb, 97 splitCh: make(chan *bpb.KVList, 2*runtime.NumCPU()), 98 countBuf: getBuf(r.opt.TmpDir), 99 } 100 101 partitionKeys := make([][]byte, 0, len(partitions)) 102 for k := range partitions { 103 partitionKeys = append(partitionKeys, []byte(k)) 104 } 105 sort.Slice(partitionKeys, func(i, j int) bool { 106 return bytes.Compare(partitionKeys[i], partitionKeys[j]) < 0 107 }) 108 109 r.reduce(partitionKeys, mapItrs, ci) 110 ci.wait() 111 112 fmt.Println("Writing split lists back to the main DB now") 113 // Write split lists back to the main DB. 114 r.writeSplitLists(db, tmpDb, writer) 115 116 x.Check(writer.Flush()) 117 118 for _, itr := range mapItrs { 119 if err := itr.Close(); err != nil { 120 fmt.Printf("Error while closing iterator: %v", err) 121 } 122 } 123 }(i, r.createBadger(i), r.createTmpBadger()) 124 } 125 return thr.Finish() 126 } 127 128 func (r *reducer) createBadgerInternal(dir string, compression bool) *badger.DB { 129 key := r.opt.EncryptionKey 130 if !r.opt.EncryptedOut { 131 key = nil 132 } 133 134 opt := r.state.opt.Badger. 135 WithDir(dir).WithValueDir(dir). 136 WithSyncWrites(false). 137 WithEncryptionKey(key) 138 139 opt.Compression = bo.None 140 opt.ZSTDCompressionLevel = 0 141 // Overwrite badger options based on the options provided by the user. 142 if compression { 143 opt.Compression = r.state.opt.Badger.Compression 144 opt.ZSTDCompressionLevel = r.state.opt.Badger.ZSTDCompressionLevel 145 } 146 147 db, err := badger.OpenManaged(opt) 148 x.Check(err) 149 150 // Zero out the key from memory. 151 opt.EncryptionKey = nil 152 return db 153 } 154 155 func (r *reducer) createBadger(i int) *badger.DB { 156 db := r.createBadgerInternal(r.opt.shardOutputDirs[i], true) 157 r.dbs = append(r.dbs, db) 158 return db 159 } 160 161 func (r *reducer) createTmpBadger() *badger.DB { 162 tmpDir, err := os.MkdirTemp(r.opt.TmpDir, "split") 163 x.Check(err) 164 // Do not enable compression in temporary badger to improve performance. 165 db := r.createBadgerInternal(tmpDir, false) 166 r.tmpDbs = append(r.tmpDbs, db) 167 return db 168 } 169 170 type mapIterator struct { 171 fd *os.File 172 reader *bufio.Reader 173 meBuf []byte 174 } 175 176 func (mi *mapIterator) Next(cbuf *z.Buffer, partitionKey []byte) { 177 readMapEntry := func() error { 178 if len(mi.meBuf) > 0 { 179 return nil 180 } 181 r := mi.reader 182 sizeBuf, err := r.Peek(binary.MaxVarintLen64) 183 if err != nil { 184 return err 185 } 186 sz, n := binary.Uvarint(sizeBuf) 187 if n <= 0 { 188 log.Fatalf("Could not read uvarint: %d", n) 189 } 190 x.Check2(r.Discard(n)) 191 if cap(mi.meBuf) < int(sz) { 192 mi.meBuf = make([]byte, int(sz)) 193 } 194 mi.meBuf = mi.meBuf[:int(sz)] 195 x.Check2(io.ReadFull(r, mi.meBuf)) 196 return nil 197 } 198 for { 199 if err := readMapEntry(); err == io.EOF { 200 break 201 } else { 202 x.Check(err) 203 } 204 key := MapEntry(mi.meBuf).Key() 205 206 if len(partitionKey) == 0 || bytes.Compare(key, partitionKey) < 0 { 207 b := cbuf.SliceAllocate(len(mi.meBuf)) 208 copy(b, mi.meBuf) 209 mi.meBuf = mi.meBuf[:0] 210 // map entry is already part of cBuf. 211 continue 212 } 213 // Current key is not part of this batch so track that we have already read the key. 214 return 215 } 216 } 217 218 func (mi *mapIterator) Close() error { 219 return mi.fd.Close() 220 } 221 222 func newMapIterator(filename string) (*pb.MapHeader, *mapIterator) { 223 fd, err := os.Open(filename) 224 x.Check(err) 225 r := snappy.NewReader(fd) 226 227 // Read the header size. 228 reader := bufio.NewReaderSize(r, 16<<10) 229 headerLenBuf := make([]byte, 4) 230 x.Check2(io.ReadFull(reader, headerLenBuf)) 231 headerLen := binary.BigEndian.Uint32(headerLenBuf) 232 // Reader the map header. 233 headerBuf := make([]byte, headerLen) 234 235 x.Check2(io.ReadFull(reader, headerBuf)) 236 header := &pb.MapHeader{} 237 err = header.Unmarshal(headerBuf) 238 x.Check(err) 239 240 itr := &mapIterator{ 241 fd: fd, 242 reader: reader, 243 } 244 return header, itr 245 } 246 247 type encodeRequest struct { 248 cbuf *z.Buffer 249 countBuf *z.Buffer 250 wg *sync.WaitGroup 251 listCh chan *z.Buffer 252 splitCh chan *bpb.KVList 253 } 254 255 func (r *reducer) streamIdFor(pred string) uint32 { 256 r.mu.RLock() 257 if id, ok := r.streamIds[pred]; ok { 258 r.mu.RUnlock() 259 return id 260 } 261 r.mu.RUnlock() 262 r.mu.Lock() 263 defer r.mu.Unlock() 264 if id, ok := r.streamIds[pred]; ok { 265 return id 266 } 267 streamId := atomic.AddUint32(&r.streamId, 1) 268 r.streamIds[pred] = streamId 269 return streamId 270 } 271 272 func (r *reducer) encode(entryCh chan *encodeRequest, closer *z.Closer) { 273 defer closer.Done() 274 275 for req := range entryCh { 276 r.toList(req) 277 req.wg.Done() 278 } 279 } 280 281 const maxSplitBatchLen = 1000 282 283 func (r *reducer) writeTmpSplits(ci *countIndexer, wg *sync.WaitGroup) { 284 defer wg.Done() 285 splitBatchLen := 0 286 287 for kvs := range ci.splitCh { 288 if kvs == nil || len(kvs.Kv) == 0 { 289 continue 290 } 291 292 for i := 0; i < len(kvs.Kv); i += maxSplitBatchLen { 293 // flush the write batch when the max batch length is reached to prevent the 294 // value log from growing over the allowed limit. 295 if splitBatchLen >= maxSplitBatchLen { 296 x.Check(ci.splitWriter.Flush()) 297 ci.splitWriter = ci.tmpDb.NewManagedWriteBatch() 298 splitBatchLen = 0 299 } 300 301 batch := &bpb.KVList{} 302 if i+maxSplitBatchLen >= len(kvs.Kv) { 303 batch.Kv = kvs.Kv[i:] 304 } else { 305 batch.Kv = kvs.Kv[i : i+maxSplitBatchLen] 306 } 307 splitBatchLen += len(batch.Kv) 308 x.Check(ci.splitWriter.WriteList(batch)) 309 } 310 } 311 x.Check(ci.splitWriter.Flush()) 312 } 313 314 func (r *reducer) startWriting(ci *countIndexer, writerCh chan *encodeRequest, closer *z.Closer) { 315 defer closer.Done() 316 317 // Concurrently write split lists to a temporary badger. 318 tmpWg := new(sync.WaitGroup) 319 tmpWg.Add(1) 320 go r.writeTmpSplits(ci, tmpWg) 321 322 count := func(req *encodeRequest) { 323 defer func() { 324 if err := req.countBuf.Release(); err != nil { 325 glog.Warningf("error in releasing buffer: %v", err) 326 } 327 }() 328 if req.countBuf.IsEmpty() { 329 return 330 } 331 332 // req.countBuf is already sorted. 333 sz := req.countBuf.LenNoPadding() 334 ci.countBuf.Grow(sz) 335 336 if err := req.countBuf.SliceIterate(func(slice []byte) error { 337 ce := countEntry(slice) 338 ci.addCountEntry(ce) 339 return nil 340 }); err != nil { 341 glog.Errorf("error while iterating over buf: %v", err) 342 x.Check(err) 343 } 344 } 345 346 var lastStreamId uint32 347 write := func(req *encodeRequest) { 348 for kvBuf := range req.listCh { 349 x.Check(ci.writer.Write(kvBuf)) 350 351 kv := &bpb.KV{} 352 err := kvBuf.SliceIterate(func(s []byte) error { 353 kv.Reset() 354 x.Check(kv.Unmarshal(s)) 355 if lastStreamId == kv.StreamId { 356 return nil 357 } 358 if lastStreamId > 0 { 359 fmt.Printf("Finishing stream id: %d\n", lastStreamId) 360 doneKV := &bpb.KV{ 361 StreamId: lastStreamId, 362 StreamDone: true, 363 } 364 365 buf := z.NewBuffer(512, "Reducer.Write") 366 defer func() { 367 if err := buf.Release(); err != nil { 368 glog.Warningf("error in releasing buffer: %v", err) 369 } 370 }() 371 badger.KVToBuffer(doneKV, buf) 372 373 if err := ci.writer.Write(buf); err != nil { 374 glog.Warningf("error in releasing buffer: %v", err) 375 } 376 } 377 lastStreamId = kv.StreamId 378 return nil 379 380 }) 381 x.Check(err) 382 if err := kvBuf.Release(); err != nil { 383 glog.Warningf("error in releasing buffer: %v", err) 384 } 385 } 386 } 387 388 for req := range writerCh { 389 write(req) 390 req.wg.Wait() 391 392 count(req) 393 } 394 395 // Wait for split lists to be written to the temporary badger. 396 close(ci.splitCh) 397 tmpWg.Wait() 398 } 399 400 func (r *reducer) writeSplitLists(db, tmpDb *badger.DB, writer *badger.StreamWriter) { 401 // baseStreamId is the max ID seen while writing non-split lists. 402 baseStreamId := atomic.AddUint32(&r.streamId, 1) 403 stream := tmpDb.NewStreamAt(math.MaxUint64) 404 stream.LogPrefix = "copying split keys to main DB" 405 stream.Send = func(buf *z.Buffer) error { 406 kvs, err := badger.BufferToKVList(buf) 407 x.Check(err) 408 409 buf.Reset() 410 for _, kv := range kvs.Kv { 411 kv.StreamId += baseStreamId 412 badger.KVToBuffer(kv, buf) 413 } 414 x.Check(writer.Write(buf)) 415 return nil 416 } 417 x.Check(stream.Orchestrate(context.Background())) 418 } 419 420 const limit = 2 << 30 421 422 func (r *reducer) throttle() { 423 for { 424 sz := atomic.LoadInt64(&r.prog.numEncoding) 425 if sz < limit { 426 return 427 } 428 time.Sleep(time.Second) 429 } 430 } 431 432 func bufferStats(cbuf *z.Buffer) { 433 fmt.Printf("Found a buffer of size: %s\n", humanize.IBytes(uint64(cbuf.LenNoPadding()))) 434 435 // Just check how many keys do we have in this giant buffer. 436 keys := make(map[uint64]int64) 437 var numEntries int 438 if err := cbuf.SliceIterate(func(slice []byte) error { 439 me := MapEntry(slice) 440 keys[z.MemHash(me.Key())]++ 441 numEntries++ 442 return nil 443 }); err != nil { 444 glog.Errorf("error while iterating over buf: %v", err) 445 x.Check(err) 446 } 447 448 keyHist := z.NewHistogramData(z.HistogramBounds(10, 32)) 449 for _, num := range keys { 450 keyHist.Update(num) 451 } 452 fmt.Printf("Num Entries: %d. Total keys: %d\n Histogram: %s\n", 453 numEntries, len(keys), keyHist.String()) 454 } 455 456 func getBuf(dir string) *z.Buffer { 457 return z.NewBuffer(64<<20, "Reducer.GetBuf"). 458 WithAutoMmap(1<<30, filepath.Join(dir, bufferDir)). 459 WithMaxSize(64 << 30) 460 } 461 462 func (r *reducer) reduce(partitionKeys [][]byte, mapItrs []*mapIterator, ci *countIndexer) { 463 cpu := r.opt.NumGoroutines 464 fmt.Printf("Num Encoders: %d\n", cpu) 465 encoderCh := make(chan *encodeRequest, 2*cpu) 466 writerCh := make(chan *encodeRequest, 2*cpu) 467 encoderCloser := z.NewCloser(cpu) 468 for i := 0; i < cpu; i++ { 469 // Start listening to encode entries 470 // For time being let's lease 100 stream id for each encoder. 471 go r.encode(encoderCh, encoderCloser) 472 } 473 // Start listening to write the badger list. 474 writerCloser := z.NewCloser(1) 475 go r.startWriting(ci, writerCh, writerCloser) 476 477 sendReq := func(zbuf *z.Buffer) { 478 wg := new(sync.WaitGroup) 479 wg.Add(1) 480 req := &encodeRequest{ 481 cbuf: zbuf, 482 wg: wg, 483 listCh: make(chan *z.Buffer, 3), 484 splitCh: ci.splitCh, 485 countBuf: getBuf(r.opt.TmpDir), 486 } 487 encoderCh <- req 488 writerCh <- req 489 } 490 491 ticker := time.NewTicker(time.Minute) 492 defer ticker.Stop() 493 494 buffers := make(chan *z.Buffer, 3) 495 496 go func() { 497 // Start collecting buffers. 498 hd := z.NewHistogramData(z.HistogramBounds(16, 40)) 499 cbuf := getBuf(r.opt.TmpDir) 500 // Append nil for the last entries. 501 partitionKeys = append(partitionKeys, nil) 502 503 for i := 0; i < len(partitionKeys); i++ { 504 pkey := partitionKeys[i] 505 for _, itr := range mapItrs { 506 itr.Next(cbuf, pkey) 507 } 508 if cbuf.LenNoPadding() < 256<<20 { 509 // Pick up more data. 510 continue 511 } 512 513 hd.Update(int64(cbuf.LenNoPadding())) 514 select { 515 case <-ticker.C: 516 fmt.Printf("Histogram of buffer sizes: %s\n", hd.String()) 517 default: 518 } 519 520 buffers <- cbuf 521 cbuf = getBuf(r.opt.TmpDir) 522 } 523 if !cbuf.IsEmpty() { 524 hd.Update(int64(cbuf.LenNoPadding())) 525 buffers <- cbuf 526 } else { 527 if err := cbuf.Release(); err != nil { 528 glog.Warningf("error in releasing buffer: %v", err) 529 } 530 } 531 fmt.Printf("Final Histogram of buffer sizes: %s\n", hd.String()) 532 close(buffers) 533 }() 534 535 for cbuf := range buffers { 536 if cbuf.LenNoPadding() > limit/2 { 537 bufferStats(cbuf) 538 } 539 r.throttle() 540 541 atomic.AddInt64(&r.prog.numEncoding, int64(cbuf.LenNoPadding())) 542 sendReq(cbuf) 543 } 544 545 // Close the encodes. 546 close(encoderCh) 547 encoderCloser.SignalAndWait() 548 549 // Close the writer. 550 close(writerCh) 551 writerCloser.SignalAndWait() 552 } 553 554 func (r *reducer) toList(req *encodeRequest) { 555 cbuf := req.cbuf 556 defer func() { 557 atomic.AddInt64(&r.prog.numEncoding, -int64(cbuf.LenNoPadding())) 558 if err := cbuf.Release(); err != nil { 559 glog.Warningf("error in releasing buffer: %v", err) 560 } 561 }() 562 563 cbuf.SortSlice(func(ls, rs []byte) bool { 564 lhs := MapEntry(ls) 565 rhs := MapEntry(rs) 566 return less(lhs, rhs) 567 }) 568 569 var currentKey []byte 570 pl := new(pb.PostingList) 571 writeVersionTs := r.state.writeTs 572 573 kvBuf := z.NewBuffer(260<<20, "Reducer.Buffer.ToList") 574 trackCountIndex := make(map[string]bool) 575 576 var freePostings []*pb.Posting 577 578 getPosting := func() *pb.Posting { 579 if sz := len(freePostings); sz > 0 { 580 last := freePostings[sz-1] 581 freePostings = freePostings[:sz-1] 582 return last 583 } 584 return &pb.Posting{} 585 } 586 587 freePosting := func(p *pb.Posting) { 588 p.Reset() 589 freePostings = append(freePostings, p) 590 } 591 592 alloc := z.NewAllocator(16<<20, "Reducer.ToList") 593 defer func() { 594 // We put alloc.Release in defer because we reassign alloc for split posting lists. 595 alloc.Release() 596 }() 597 598 start, end, num := cbuf.StartOffset(), cbuf.StartOffset(), 0 599 appendToList := func() { 600 if num == 0 { 601 return 602 } 603 atomic.AddInt64(&r.prog.reduceEdgeCount, int64(num)) 604 605 pk, err := x.Parse(currentKey) 606 x.Check(err) 607 x.AssertTrue(len(pk.Attr) > 0) 608 609 // We might not need to track count index every time. 610 if pk.IsData() || pk.IsReverse() { 611 doCount, ok := trackCountIndex[pk.Attr] 612 if !ok { 613 doCount = r.schema.getSchema(pk.Attr).GetCount() 614 trackCountIndex[pk.Attr] = doCount 615 } 616 if doCount { 617 // Calculate count entries. 618 ck := x.CountKey(pk.Attr, uint32(num), pk.IsReverse()) 619 dst := req.countBuf.SliceAllocate(countEntrySize(ck)) 620 marshalCountEntry(dst, ck, pk.Uid) 621 } 622 } 623 624 alloc.Reset() 625 enc := codec.Encoder{BlockSize: 256, Alloc: alloc} 626 var lastUid uint64 627 var slice []byte 628 next := start 629 for next >= 0 && (next < end || end == -1) { 630 slice, next = cbuf.Slice(next) 631 me := MapEntry(slice) 632 633 uid := me.Uid() 634 if uid == lastUid { 635 continue 636 } 637 lastUid = uid 638 639 enc.Add(uid) 640 if pbuf := me.Plist(); len(pbuf) > 0 { 641 p := getPosting() 642 x.Check(p.Unmarshal(pbuf)) 643 pl.Postings = append(pl.Postings, p) 644 } 645 } 646 647 // We should not do defer FreePack here, because we might be giving ownership of it away if 648 // we run Rollup. 649 pl.Pack = enc.Done() 650 numUids := codec.ExactLen(pl.Pack) 651 652 atomic.AddInt64(&r.prog.reduceKeyCount, 1) 653 654 // For a UID-only posting list, the badger value is a delta packed UID 655 // list. The UserMeta indicates to treat the value as a delta packed 656 // list when the value is read by dgraph. For a value posting list, 657 // the full pb.Posting type is used (which pb.y contains the 658 // delta packed UID list). 659 if numUids == 0 { 660 // No need to FrePack here because we are reusing alloc. 661 return 662 } 663 664 // If the schema is of type uid and not a list but we have more than one uid in this 665 // list, we cannot enforce the constraint without losing data. Inform the user and 666 // force the schema to be a list so that all the data can be found when Dgraph is started. 667 // The user should fix their data once Dgraph is up. 668 parsedKey, err := x.Parse(currentKey) 669 x.Check(err) 670 if parsedKey.IsData() { 671 schema := r.state.schema.getSchema(parsedKey.Attr) 672 if schema.GetValueType() == pb.Posting_UID && !schema.GetList() && numUids > 1 { 673 fmt.Printf("Schema for pred %s specifies that this is not a list but more than "+ 674 "one UID has been found. Forcing the schema to be a list to avoid any "+ 675 "data loss. Please fix the data to your specifications once Dgraph is up.\n", 676 parsedKey.Attr) 677 r.state.schema.setSchemaAsList(parsedKey.Attr) 678 } 679 } 680 681 shouldSplit := pl.Size() > (1<<20)/2 && len(pl.Pack.Blocks) > 1 682 if shouldSplit { 683 // Give ownership of pl.Pack away to list. Rollup would deallocate the Pack. 684 l := posting.NewList(y.Copy(currentKey), pl, writeVersionTs) 685 kvs, err := l.Rollup(nil) 686 x.Check(err) 687 688 // Assign a new allocator, so we don't reset the one we were using during Rollup. 689 alloc = z.NewAllocator(16<<20, "Reducer.AppendToList") 690 691 for _, kv := range kvs { 692 kv.StreamId = r.streamIdFor(pk.Attr) 693 } 694 badger.KVToBuffer(kvs[0], kvBuf) 695 if splits := kvs[1:]; len(splits) > 0 { 696 req.splitCh <- &bpb.KVList{Kv: splits} 697 } 698 } else { 699 kv := posting.MarshalPostingList(pl, nil) 700 // No need to FreePack here, because we are reusing alloc. 701 702 kv.Key = y.Copy(currentKey) 703 kv.Version = writeVersionTs 704 kv.StreamId = r.streamIdFor(pk.Attr) 705 badger.KVToBuffer(kv, kvBuf) 706 } 707 708 for _, p := range pl.Postings { 709 freePosting(p) 710 } 711 pl.Reset() 712 } 713 714 for end >= 0 { 715 slice, next := cbuf.Slice(end) 716 entry := MapEntry(slice) 717 entryKey := entry.Key() 718 719 if !bytes.Equal(entryKey, currentKey) && currentKey != nil { 720 appendToList() 721 start, num = end, 0 // Start would start from current one. 722 723 if kvBuf.LenNoPadding() > 256<<20 { 724 req.listCh <- kvBuf 725 kvBuf = z.NewBuffer(260<<20, "Reducer.Buffer.KVBuffer") 726 } 727 } 728 end = next 729 currentKey = append(currentKey[:0], entryKey...) 730 num++ 731 } 732 733 appendToList() 734 if kvBuf.LenNoPadding() > 0 { 735 req.listCh <- kvBuf 736 } else { 737 if err := kvBuf.Release(); err != nil { 738 glog.Warningf("error in releasing buffer: %v", err) 739 } 740 } 741 close(req.listCh) 742 743 // Sort countBuf before returning to better use the goroutines. 744 req.countBuf.SortSlice(func(ls, rs []byte) bool { 745 left := countEntry(ls) 746 right := countEntry(rs) 747 return left.less(right) 748 }) 749 }