github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/count_index.go (about) 1 /* 2 * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package bulk 18 19 import ( 20 "bytes" 21 "encoding/binary" 22 "fmt" 23 "sync" 24 "sync/atomic" 25 26 "github.com/golang/glog" 27 28 "github.com/dgraph-io/badger/v3" 29 "github.com/dgraph-io/dgraph/codec" 30 "github.com/dgraph-io/dgraph/posting" 31 "github.com/dgraph-io/dgraph/protos/pb" 32 "github.com/dgraph-io/dgraph/x" 33 "github.com/dgraph-io/ristretto/z" 34 ) 35 36 // type countEntry struct { 37 // uid uint64 38 // key []byte 39 // } 40 41 type countEntry []byte 42 43 func countEntrySize(key []byte) int { 44 return 8 + 4 + len(key) 45 } 46 func marshalCountEntry(dst, key []byte, uid uint64) { 47 binary.BigEndian.PutUint64(dst[0:8], uid) 48 49 binary.BigEndian.PutUint32(dst[8:12], uint32(len(key))) 50 n := copy(dst[12:], key) 51 x.AssertTrue(len(dst) == n+12) 52 } 53 func (ci countEntry) Uid() uint64 { 54 return binary.BigEndian.Uint64(ci[0:8]) 55 } 56 func (ci countEntry) Key() []byte { 57 sz := binary.BigEndian.Uint32(ci[8:12]) 58 return ci[12 : 12+sz] 59 } 60 func (ci countEntry) less(oe countEntry) bool { 61 lk, rk := ci.Key(), oe.Key() 62 if cmp := bytes.Compare(lk, rk); cmp != 0 { 63 return cmp < 0 64 } 65 return ci.Uid() < oe.Uid() 66 } 67 68 type current struct { 69 pred string 70 rev bool 71 track bool 72 } 73 74 type countIndexer struct { 75 *reducer 76 writer *badger.StreamWriter 77 splitWriter *badger.WriteBatch 78 splitCh chan *badger.KVList 79 tmpDb *badger.DB 80 cur current 81 countBuf *z.Buffer 82 wg sync.WaitGroup 83 } 84 85 // addUid adds the uid from rawKey to a count index if a count index is 86 // required by the schema. This method expects keys to be passed into it in 87 // sorted order. 88 func (c *countIndexer) addCountEntry(ce countEntry) { 89 pk, err := x.Parse(ce.Key()) 90 x.Check(err) 91 92 sameIndexKey := pk.Attr == c.cur.pred && pk.IsReverse() == c.cur.rev 93 if sameIndexKey && !c.cur.track { 94 return 95 } 96 97 if !sameIndexKey { 98 if c.countBuf.LenNoPadding() > 0 { 99 c.wg.Add(1) 100 go c.writeIndex(c.countBuf) 101 c.countBuf = getBuf(c.opt.TmpDir) 102 } 103 c.cur.pred = pk.Attr 104 c.cur.rev = pk.IsReverse() 105 c.cur.track = c.schema.getSchema(pk.Attr).GetCount() 106 } 107 if c.cur.track { 108 dst := c.countBuf.SliceAllocate(len(ce)) 109 copy(dst, ce) 110 } 111 } 112 113 func (c *countIndexer) writeIndex(buf *z.Buffer) { 114 defer func() { 115 c.wg.Done() 116 if err := buf.Release(); err != nil { 117 glog.Warningf("error in releasing buffer: %v", err) 118 } 119 120 }() 121 if buf.IsEmpty() { 122 return 123 } 124 125 streamId := atomic.AddUint32(&c.streamId, 1) 126 buf.SortSlice(func(ls, rs []byte) bool { 127 left := countEntry(ls) 128 right := countEntry(rs) 129 return left.less(right) 130 }) 131 132 tmp, _ := buf.Slice(buf.StartOffset()) 133 lastCe := countEntry(tmp) 134 { 135 pk, err := x.Parse(lastCe.Key()) 136 x.Check(err) 137 fmt.Printf("Writing count index for %q rev=%v\n", pk.Attr, pk.IsReverse()) 138 } 139 140 alloc := z.NewAllocator(8<<20, "CountIndexer.WriteIndex") 141 defer alloc.Release() 142 143 var pl pb.PostingList 144 encoder := codec.Encoder{BlockSize: 256, Alloc: alloc} 145 146 outBuf := z.NewBuffer(5<<20, "CountIndexer.Buffer.WriteIndex") 147 defer func() { 148 if err := outBuf.Release(); err != nil { 149 glog.Warningf("error in releasing buffer: %v", err) 150 } 151 }() 152 encode := func() { 153 pl.Pack = encoder.Done() 154 if codec.ExactLen(pl.Pack) == 0 { 155 return 156 } 157 158 kv := posting.MarshalPostingList(&pl, nil) 159 kv.Key = append([]byte{}, lastCe.Key()...) 160 kv.Version = c.state.writeTs 161 kv.StreamId = streamId 162 badger.KVToBuffer(kv, outBuf) 163 164 alloc.Reset() 165 encoder = codec.Encoder{BlockSize: 256, Alloc: alloc} 166 pl.Reset() 167 168 // flush out the buffer. 169 if outBuf.LenNoPadding() > 4<<20 { 170 x.Check(c.writer.Write(outBuf)) 171 outBuf.Reset() 172 } 173 } 174 175 if err := buf.SliceIterate(func(slice []byte) error { 176 ce := countEntry(slice) 177 if !bytes.Equal(lastCe.Key(), ce.Key()) { 178 encode() 179 } 180 encoder.Add(ce.Uid()) 181 lastCe = ce 182 return nil 183 }); err != nil { 184 glog.Errorf("error while counting in buf: %v\n", err) 185 x.Check(err) 186 } 187 encode() 188 x.Check(c.writer.Write(outBuf)) 189 } 190 191 func (c *countIndexer) wait() { 192 if c.countBuf.LenNoPadding() > 0 { 193 c.wg.Add(1) 194 go c.writeIndex(c.countBuf) 195 } else { 196 if err := c.countBuf.Release(); err != nil { 197 glog.Warningf("error in releasing buffer: %v", err) 198 } 199 } 200 c.wg.Wait() 201 }