github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/bulk/count_index.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package bulk
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"sort"
    24  	"sync"
    25  	"sync/atomic"
    26  
    27  	"github.com/dgraph-io/badger"
    28  	bpb "github.com/dgraph-io/badger/pb"
    29  	"github.com/dgraph-io/dgraph/codec"
    30  	"github.com/dgraph-io/dgraph/posting"
    31  	"github.com/dgraph-io/dgraph/protos/pb"
    32  	"github.com/dgraph-io/dgraph/x"
    33  )
    34  
    35  type current struct {
    36  	pred  string
    37  	rev   bool
    38  	track bool
    39  }
    40  
    41  type countIndexer struct {
    42  	*reducer
    43  	writer *badger.StreamWriter
    44  	cur    current
    45  	counts map[int][]uint64
    46  	wg     sync.WaitGroup
    47  }
    48  
    49  // addUid adds the uid from rawKey to a count index if a count index is
    50  // required by the schema. This method expects keys to be passed into it in
    51  // sorted order.
    52  func (c *countIndexer) addUid(rawKey []byte, count int) {
    53  	key, err := x.Parse(rawKey)
    54  	if err != nil {
    55  		fmt.Printf("Error while parsing key %s: %v\n", hex.Dump(rawKey), err)
    56  		return
    57  	}
    58  	if !key.IsData() && !key.IsReverse() {
    59  		return
    60  	}
    61  	sameIndexKey := key.Attr == c.cur.pred && key.IsReverse() == c.cur.rev
    62  	if sameIndexKey && !c.cur.track {
    63  		return
    64  	}
    65  
    66  	if !sameIndexKey {
    67  		if len(c.counts) > 0 {
    68  			c.wg.Add(1)
    69  			go c.writeIndex(c.cur.pred, c.cur.rev, c.counts)
    70  		}
    71  		if len(c.counts) > 0 || c.counts == nil {
    72  			c.counts = make(map[int][]uint64)
    73  		}
    74  		c.cur.pred = key.Attr
    75  		c.cur.rev = key.IsReverse()
    76  		c.cur.track = c.schema.getSchema(key.Attr).GetCount()
    77  	}
    78  	if c.cur.track {
    79  		c.counts[count] = append(c.counts[count], key.Uid)
    80  	}
    81  }
    82  
    83  func (c *countIndexer) writeIndex(pred string, rev bool, counts map[int][]uint64) {
    84  	defer c.wg.Done()
    85  
    86  	streamId := atomic.AddUint32(&c.streamId, 1)
    87  	list := &bpb.KVList{}
    88  	for count, uids := range counts {
    89  		sort.Slice(uids, func(i, j int) bool { return uids[i] < uids[j] })
    90  
    91  		var pl pb.PostingList
    92  		pl.Pack = codec.Encode(uids, 256)
    93  		data, err := pl.Marshal()
    94  		x.Check(err)
    95  		list.Kv = append(list.Kv, &bpb.KV{
    96  			Key:      x.CountKey(pred, uint32(count), rev),
    97  			Value:    data,
    98  			UserMeta: []byte{posting.BitCompletePosting},
    99  			Version:  c.state.writeTs,
   100  			StreamId: streamId,
   101  		})
   102  	}
   103  	sort.Slice(list.Kv, func(i, j int) bool {
   104  		return bytes.Compare(list.Kv[i].Key, list.Kv[j].Key) < 0
   105  	})
   106  	if err := c.writer.Write(list); err != nil {
   107  		x.Check(err)
   108  	}
   109  }
   110  
   111  func (c *countIndexer) wait() {
   112  	c.wg.Wait()
   113  }