github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/bulk/count_index.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package bulk 18 19 import ( 20 "bytes" 21 "encoding/hex" 22 "fmt" 23 "sort" 24 "sync" 25 "sync/atomic" 26 27 "github.com/dgraph-io/badger" 28 bpb "github.com/dgraph-io/badger/pb" 29 "github.com/dgraph-io/dgraph/codec" 30 "github.com/dgraph-io/dgraph/posting" 31 "github.com/dgraph-io/dgraph/protos/pb" 32 "github.com/dgraph-io/dgraph/x" 33 ) 34 35 type current struct { 36 pred string 37 rev bool 38 track bool 39 } 40 41 type countIndexer struct { 42 *reducer 43 writer *badger.StreamWriter 44 cur current 45 counts map[int][]uint64 46 wg sync.WaitGroup 47 } 48 49 // addUid adds the uid from rawKey to a count index if a count index is 50 // required by the schema. This method expects keys to be passed into it in 51 // sorted order. 52 func (c *countIndexer) addUid(rawKey []byte, count int) { 53 key, err := x.Parse(rawKey) 54 if err != nil { 55 fmt.Printf("Error while parsing key %s: %v\n", hex.Dump(rawKey), err) 56 return 57 } 58 if !key.IsData() && !key.IsReverse() { 59 return 60 } 61 sameIndexKey := key.Attr == c.cur.pred && key.IsReverse() == c.cur.rev 62 if sameIndexKey && !c.cur.track { 63 return 64 } 65 66 if !sameIndexKey { 67 if len(c.counts) > 0 { 68 c.wg.Add(1) 69 go c.writeIndex(c.cur.pred, c.cur.rev, c.counts) 70 } 71 if len(c.counts) > 0 || c.counts == nil { 72 c.counts = make(map[int][]uint64) 73 } 74 c.cur.pred = key.Attr 75 c.cur.rev = key.IsReverse() 76 c.cur.track = c.schema.getSchema(key.Attr).GetCount() 77 } 78 if c.cur.track { 79 c.counts[count] = append(c.counts[count], key.Uid) 80 } 81 } 82 83 func (c *countIndexer) writeIndex(pred string, rev bool, counts map[int][]uint64) { 84 defer c.wg.Done() 85 86 streamId := atomic.AddUint32(&c.streamId, 1) 87 list := &bpb.KVList{} 88 for count, uids := range counts { 89 sort.Slice(uids, func(i, j int) bool { return uids[i] < uids[j] }) 90 91 var pl pb.PostingList 92 pl.Pack = codec.Encode(uids, 256) 93 data, err := pl.Marshal() 94 x.Check(err) 95 list.Kv = append(list.Kv, &bpb.KV{ 96 Key: x.CountKey(pred, uint32(count), rev), 97 Value: data, 98 UserMeta: []byte{posting.BitCompletePosting}, 99 Version: c.state.writeTs, 100 StreamId: streamId, 101 }) 102 } 103 sort.Slice(list.Kv, func(i, j int) bool { 104 return bytes.Compare(list.Kv[i].Key, list.Kv[j].Key) < 0 105 }) 106 if err := c.writer.Write(list); err != nil { 107 x.Check(err) 108 } 109 } 110 111 func (c *countIndexer) wait() { 112 c.wg.Wait() 113 }