github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/x/keys.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package x
    18  
    19  import (
    20  	"encoding/binary"
    21  	"math"
    22  	"strings"
    23  
    24  	"github.com/pkg/errors"
    25  
    26  	"github.com/dgraph-io/dgraph/protos/pb"
    27  )
    28  
    29  const (
    30  	// TODO(pawan) - Make this 2 bytes long. Right now ParsedKey has byteType and
    31  	// bytePrefix. Change it so that it just has one field which has all the information.
    32  
    33  	// ByteData indicates the key stores data.
    34  	ByteData = byte(0x00)
    35  	// ByteIndex indicates the key stores an index.
    36  	ByteIndex = byte(0x02)
    37  	// ByteReverse indicates the key stores a reverse index.
    38  	ByteReverse = byte(0x04)
    39  	// ByteCount indicates the key stores a count index.
    40  	ByteCount = byte(0x08)
    41  	// ByteCountRev indicates the key stores a reverse count index.
    42  	ByteCountRev = ByteCount | ByteReverse
    43  	// DefaultPrefix is the prefix used for data, index and reverse keys so that relative
    44  	// order of data doesn't change keys of same attributes are located together.
    45  	DefaultPrefix = byte(0x00)
    46  	byteSchema    = byte(0x01)
    47  	byteType      = byte(0x02)
    48  	// ByteSplit is a constant to specify a given key corresponds to a posting list split
    49  	// into multiple parts.
    50  	ByteSplit = byte(0x01)
    51  	// ByteUnused is a constant to specify keys which need to be discarded.
    52  	ByteUnused = byte(0xff)
    53  )
    54  
    55  func writeAttr(buf []byte, attr string) []byte {
    56  	AssertTrue(len(attr) < math.MaxUint16)
    57  	binary.BigEndian.PutUint16(buf[:2], uint16(len(attr)))
    58  
    59  	rest := buf[2:]
    60  	AssertTrue(len(attr) == copy(rest, attr))
    61  
    62  	return rest[len(attr):]
    63  }
    64  
    65  // genKey creates the key and writes the initial bytes (type byte, length of attribute,
    66  // and the attribute itself). It leaves the rest of the key empty for further processing
    67  // if necessary.
    68  func generateKey(typeByte byte, attr string, totalLen int) []byte {
    69  	AssertTrue(totalLen >= 1+2+len(attr))
    70  
    71  	buf := make([]byte, totalLen)
    72  	buf[0] = typeByte
    73  	rest := buf[1:]
    74  
    75  	writeAttr(rest, attr)
    76  	return buf
    77  }
    78  
    79  // SchemaKey returns schema key for given attribute. Schema keys are stored
    80  // separately with unique prefix, since we need to iterate over all schema keys.
    81  // The structure of a schema key is as follows:
    82  //
    83  // byte 0: key type prefix (set to byteSchema)
    84  // byte 1-2: length of attr
    85  // next len(attr) bytes: value of attr
    86  func SchemaKey(attr string) []byte {
    87  	return generateKey(byteSchema, attr, 1+2+len(attr))
    88  }
    89  
    90  // TypeKey returns type key for given type name. Type keys are stored separately
    91  // with a unique prefix, since we need to iterate over all type keys.
    92  // The structure of a type key is as follows:
    93  //
    94  // byte 0: key type prefix (set to byteType)
    95  // byte 1-2: length of typeName
    96  // next len(attr) bytes: value of attr (the type name)
    97  func TypeKey(attr string) []byte {
    98  	return generateKey(byteType, attr, 1+2+len(attr))
    99  }
   100  
   101  // DataKey generates a data key with the given attribute and UID.
   102  // The structure of a data key is as follows:
   103  //
   104  // byte 0: key type prefix (set to DefaultPrefix)
   105  // byte 1-2: length of attr
   106  // next len(attr) bytes: value of attr
   107  // next byte: data type prefix (set to ByteData)
   108  // next byte: byte to determine if this key corresponds to a list that has been split
   109  //   into multiple parts
   110  // next eight bytes: value of uid
   111  // next eight bytes (optional): if the key corresponds to a split list, the startUid of
   112  //   the split stored in this key.
   113  func DataKey(attr string, uid uint64) []byte {
   114  	prefixLen := 1 + 2 + len(attr)
   115  	totalLen := prefixLen + 1 + 1 + 8
   116  	buf := generateKey(DefaultPrefix, attr, totalLen)
   117  
   118  	rest := buf[prefixLen:]
   119  	rest[0] = ByteData
   120  
   121  	// By default, this key does not correspond to a part of a split key.
   122  	rest = rest[1:]
   123  	rest[0] = 0
   124  
   125  	rest = rest[1:]
   126  	binary.BigEndian.PutUint64(rest, uid)
   127  	return buf
   128  }
   129  
   130  // ReverseKey generates a reverse key with the given attribute and UID.
   131  // The structure of a reverse key is as follows:
   132  //
   133  // byte 0: key type prefix (set to DefaultPrefix)
   134  // byte 1-2: length of attr
   135  // next len(attr) bytes: value of attr
   136  // next byte: data type prefix (set to ByteReverse)
   137  // next byte: byte to determine if this key corresponds to a list that has been split
   138  //   into multiple parts
   139  // next eight bytes: value of uid
   140  // next eight bytes (optional): if the key corresponds to a split list, the startUid of
   141  //   the split stored in this key.
   142  func ReverseKey(attr string, uid uint64) []byte {
   143  	prefixLen := 1 + 2 + len(attr)
   144  	totalLen := prefixLen + 1 + 1 + 8
   145  	buf := generateKey(DefaultPrefix, attr, totalLen)
   146  
   147  	rest := buf[prefixLen:]
   148  	rest[0] = ByteReverse
   149  
   150  	// By default, this key does not correspond to a part of a split key.
   151  	rest = rest[1:]
   152  	rest[0] = 0
   153  
   154  	rest = rest[1:]
   155  	binary.BigEndian.PutUint64(rest, uid)
   156  	return buf
   157  }
   158  
   159  // IndexKey generates a index key with the given attribute and term.
   160  // The structure of an index key is as follows:
   161  //
   162  // byte 0: key type prefix (set to DefaultPrefix)
   163  // byte 1-2: length of attr
   164  // next len(attr) bytes: value of attr
   165  // next byte: data type prefix (set to ByteIndex)
   166  // next byte: byte to determine if this key corresponds to a list that has been split
   167  //   into multiple parts
   168  // next len(term) bytes: value of term
   169  // next eight bytes (optional): if the key corresponds to a split list, the startUid of
   170  //   the split stored in this key.
   171  func IndexKey(attr, term string) []byte {
   172  	prefixLen := 1 + 2 + len(attr)
   173  	totalLen := prefixLen + 1 + 1 + len(term)
   174  	buf := generateKey(DefaultPrefix, attr, totalLen)
   175  
   176  	rest := buf[prefixLen:]
   177  	rest[0] = ByteIndex
   178  
   179  	// By default, this key does not correspond to a part of a split key.
   180  	rest = rest[1:]
   181  	rest[0] = 0
   182  
   183  	rest = rest[1:]
   184  	AssertTrue(len(term) == copy(rest, term))
   185  	return buf
   186  }
   187  
   188  // CountKey generates a count key with the given attribute and uid.
   189  // The structure of a count key is as follows:
   190  //
   191  // byte 0: key type prefix (set to DefaultPrefix)
   192  // byte 1-2: length of attr
   193  // next len(attr) bytes: value of attr
   194  // next byte: data type prefix (set to ByteCount or ByteCountRev)
   195  // next byte: byte to determine if this key corresponds to a list that has been split
   196  //   into multiple parts. Since count indexes only store one number, this value will
   197  //   always be zero.
   198  // next four bytes: value of count.
   199  func CountKey(attr string, count uint32, reverse bool) []byte {
   200  	prefixLen := 1 + 2 + len(attr)
   201  	totalLen := prefixLen + 1 + 1 + 4
   202  	buf := generateKey(DefaultPrefix, attr, totalLen)
   203  
   204  	rest := buf[prefixLen:]
   205  	if reverse {
   206  		rest[0] = ByteCountRev
   207  	} else {
   208  		rest[0] = ByteCount
   209  	}
   210  
   211  	// By default, this key does not correspond to a part of a split key.
   212  	rest = rest[1:]
   213  	rest[0] = 0
   214  
   215  	rest = rest[1:]
   216  	binary.BigEndian.PutUint32(rest, count)
   217  	return buf
   218  }
   219  
   220  // ParsedKey represents a key that has been parsed into its multiple attributes.
   221  type ParsedKey struct {
   222  	byteType    byte
   223  	Attr        string
   224  	Uid         uint64
   225  	HasStartUid bool
   226  	StartUid    uint64
   227  	Term        string
   228  	Count       uint32
   229  	bytePrefix  byte
   230  }
   231  
   232  // IsData returns whether the key is a data key.
   233  func (p ParsedKey) IsData() bool {
   234  	return p.bytePrefix == DefaultPrefix && p.byteType == ByteData
   235  }
   236  
   237  // IsReverse returns whether the key is a reverse key.
   238  func (p ParsedKey) IsReverse() bool {
   239  	return p.bytePrefix == DefaultPrefix && p.byteType == ByteReverse
   240  }
   241  
   242  // IsCountOrCountRev returns whether the key is a count or a count rev key.
   243  func (p ParsedKey) IsCountOrCountRev() bool {
   244  	return p.IsCount() || p.IsCountRev()
   245  }
   246  
   247  // IsCount returns whether the key is a count key.
   248  func (p ParsedKey) IsCount() bool {
   249  	return p.bytePrefix == DefaultPrefix && p.byteType == ByteCount
   250  }
   251  
   252  // IsCountRev returns whether the key is a count rev key.
   253  func (p ParsedKey) IsCountRev() bool {
   254  	return p.bytePrefix == DefaultPrefix && p.byteType == ByteCountRev
   255  }
   256  
   257  // IsIndex returns whether the key is an index key.
   258  func (p ParsedKey) IsIndex() bool {
   259  	return p.bytePrefix == DefaultPrefix && p.byteType == ByteIndex
   260  }
   261  
   262  // IsSchema returns whether the key is a schema key.
   263  func (p ParsedKey) IsSchema() bool {
   264  	return p.bytePrefix == byteSchema
   265  }
   266  
   267  // IsType returns whether the key is a type key.
   268  func (p ParsedKey) IsType() bool {
   269  	return p.bytePrefix == byteType
   270  }
   271  
   272  // IsOfType checks whether the key is of the given type.
   273  func (p ParsedKey) IsOfType(typ byte) bool {
   274  	switch typ {
   275  	case ByteCount, ByteCountRev:
   276  		return p.IsCountOrCountRev()
   277  	case ByteReverse:
   278  		return p.IsReverse()
   279  	case ByteIndex:
   280  		return p.IsIndex()
   281  	case ByteData:
   282  		return p.IsData()
   283  	default:
   284  	}
   285  	return false
   286  }
   287  
   288  // SkipPredicate returns the first key after the keys corresponding to the predicate
   289  // of this key. Useful when iterating in the reverse order.
   290  func (p ParsedKey) SkipPredicate() []byte {
   291  	buf := make([]byte, 1+2+len(p.Attr)+1)
   292  	buf[0] = p.bytePrefix
   293  	rest := buf[1:]
   294  	k := writeAttr(rest, p.Attr)
   295  	AssertTrue(len(k) == 1)
   296  	k[0] = 0xFF
   297  	return buf
   298  }
   299  
   300  // SkipSchema returns the first key after all the schema keys.
   301  func (p ParsedKey) SkipSchema() []byte {
   302  	var buf [1]byte
   303  	buf[0] = byteSchema + 1
   304  	return buf[:]
   305  }
   306  
   307  // SkipType returns the first key after all the type keys.
   308  func (p ParsedKey) SkipType() []byte {
   309  	var buf [1]byte
   310  	buf[0] = byteType + 1
   311  	return buf[:]
   312  }
   313  
   314  // DataPrefix returns the prefix for data keys.
   315  func (p ParsedKey) DataPrefix() []byte {
   316  	buf := make([]byte, 1+2+len(p.Attr)+1+1)
   317  	buf[0] = p.bytePrefix
   318  	rest := buf[1:]
   319  	k := writeAttr(rest, p.Attr)
   320  	AssertTrue(len(k) == 2)
   321  	k[0] = ByteData
   322  	k[1] = 0
   323  	return buf
   324  }
   325  
   326  // IndexPrefix returns the prefix for index keys.
   327  func (p ParsedKey) IndexPrefix() []byte {
   328  	buf := make([]byte, 1+2+len(p.Attr)+1+1)
   329  	buf[0] = p.bytePrefix
   330  	rest := buf[1:]
   331  	k := writeAttr(rest, p.Attr)
   332  	AssertTrue(len(k) == 2)
   333  	k[0] = ByteIndex
   334  	k[1] = 0
   335  	return buf
   336  }
   337  
   338  // ReversePrefix returns the prefix for index keys.
   339  func (p ParsedKey) ReversePrefix() []byte {
   340  	buf := make([]byte, 1+2+len(p.Attr)+1+1)
   341  	buf[0] = p.bytePrefix
   342  	rest := buf[1:]
   343  	k := writeAttr(rest, p.Attr)
   344  	AssertTrue(len(k) == 2)
   345  	k[0] = ByteReverse
   346  	k[1] = 0
   347  	return buf
   348  }
   349  
   350  // CountPrefix returns the prefix for count keys.
   351  func (p ParsedKey) CountPrefix(reverse bool) []byte {
   352  	buf := make([]byte, 1+2+len(p.Attr)+1+1)
   353  	buf[0] = p.bytePrefix
   354  	rest := buf[1:]
   355  	k := writeAttr(rest, p.Attr)
   356  	AssertTrue(len(k) == 2)
   357  	if reverse {
   358  		k[0] = ByteCountRev
   359  	} else {
   360  		k[0] = ByteCount
   361  	}
   362  	k[1] = 0
   363  	return buf
   364  }
   365  
   366  // ToBackupKey returns the key in the format used for writing backups.
   367  func (p ParsedKey) ToBackupKey() *pb.BackupKey {
   368  	key := pb.BackupKey{}
   369  	key.Attr = p.Attr
   370  	key.Uid = p.Uid
   371  	key.StartUid = p.StartUid
   372  	key.Term = p.Term
   373  	key.Count = p.Count
   374  
   375  	switch {
   376  	case p.IsData():
   377  		key.Type = pb.BackupKey_DATA
   378  	case p.IsIndex():
   379  		key.Type = pb.BackupKey_INDEX
   380  	case p.IsReverse():
   381  		key.Type = pb.BackupKey_REVERSE
   382  	case p.IsCount():
   383  		key.Type = pb.BackupKey_COUNT
   384  	case p.IsCountRev():
   385  		key.Type = pb.BackupKey_COUNT_REV
   386  	case p.IsSchema():
   387  		key.Type = pb.BackupKey_SCHEMA
   388  	case p.IsType():
   389  		key.Type = pb.BackupKey_TYPE
   390  	}
   391  	return &key
   392  }
   393  
   394  // FromBackupKey takes a key in the format used for backups and converts it to a key.
   395  func FromBackupKey(backupKey *pb.BackupKey) []byte {
   396  	if backupKey == nil {
   397  		return nil
   398  	}
   399  
   400  	var key []byte
   401  	switch backupKey.Type {
   402  	case pb.BackupKey_DATA:
   403  		key = DataKey(backupKey.Attr, backupKey.Uid)
   404  	case pb.BackupKey_INDEX:
   405  		key = IndexKey(backupKey.Attr, backupKey.Term)
   406  	case pb.BackupKey_REVERSE:
   407  		key = ReverseKey(backupKey.Attr, backupKey.Uid)
   408  	case pb.BackupKey_COUNT:
   409  		key = CountKey(backupKey.Attr, backupKey.Count, false)
   410  	case pb.BackupKey_COUNT_REV:
   411  		key = CountKey(backupKey.Attr, backupKey.Count, true)
   412  	case pb.BackupKey_SCHEMA:
   413  		key = SchemaKey(backupKey.Attr)
   414  	case pb.BackupKey_TYPE:
   415  		key = TypeKey(backupKey.Attr)
   416  	}
   417  
   418  	if backupKey.StartUid > 0 {
   419  		var err error
   420  		key, err = GetSplitKey(key, backupKey.StartUid)
   421  		Check(err)
   422  	}
   423  	return key
   424  }
   425  
   426  // SchemaPrefix returns the prefix for Schema keys.
   427  func SchemaPrefix() []byte {
   428  	var buf [1]byte
   429  	buf[0] = byteSchema
   430  	return buf[:]
   431  }
   432  
   433  // TypePrefix returns the prefix for Schema keys.
   434  func TypePrefix() []byte {
   435  	var buf [1]byte
   436  	buf[0] = byteType
   437  	return buf[:]
   438  }
   439  
   440  // PredicatePrefix returns the prefix for all keys belonging to this predicate except schema key.
   441  func PredicatePrefix(predicate string) []byte {
   442  	buf := make([]byte, 1+2+len(predicate))
   443  	buf[0] = DefaultPrefix
   444  	k := writeAttr(buf[1:], predicate)
   445  	AssertTrue(len(k) == 0)
   446  	return buf
   447  }
   448  
   449  // GetSplitKey takes a key baseKey and generates the key of the list split that starts at startUid.
   450  func GetSplitKey(baseKey []byte, startUid uint64) ([]byte, error) {
   451  	keyCopy := make([]byte, len(baseKey)+8)
   452  	copy(keyCopy, baseKey)
   453  
   454  	p, err := Parse(baseKey)
   455  	if err != nil {
   456  		return nil, err
   457  	}
   458  
   459  	index := 1 + 2 + len(p.Attr) + 1
   460  	if index >= len(keyCopy) {
   461  		panic("Cannot write to key. Key is too small")
   462  	}
   463  	keyCopy[index] = ByteSplit
   464  	binary.BigEndian.PutUint64(keyCopy[len(baseKey):], startUid)
   465  
   466  	return keyCopy, nil
   467  }
   468  
   469  // Parse would parse the key. ParsedKey does not reuse the key slice, so the key slice can change
   470  // without affecting the contents of ParsedKey.
   471  func Parse(key []byte) (ParsedKey, error) {
   472  	var p ParsedKey
   473  
   474  	p.bytePrefix = key[0]
   475  	if p.bytePrefix == ByteUnused {
   476  		return p, nil
   477  	}
   478  
   479  	sz := int(binary.BigEndian.Uint16(key[1:3]))
   480  	k := key[3:]
   481  
   482  	p.Attr = string(k[:sz])
   483  	k = k[sz:]
   484  
   485  	switch p.bytePrefix {
   486  	case byteSchema, byteType:
   487  		return p, nil
   488  	default:
   489  	}
   490  
   491  	p.byteType = k[0]
   492  	k = k[1:]
   493  
   494  	p.HasStartUid = k[0] == ByteSplit
   495  	k = k[1:]
   496  
   497  	switch p.byteType {
   498  	case ByteData, ByteReverse:
   499  		if len(k) < 8 {
   500  			return p, errors.Errorf("uid length < 8 for key: %q, parsed key: %+v", key, p)
   501  		}
   502  		p.Uid = binary.BigEndian.Uint64(k)
   503  
   504  		if !p.HasStartUid {
   505  			break
   506  		}
   507  
   508  		if len(k) < 16 {
   509  			return p, errors.Errorf("StartUid length < 8 for key: %q, parsed key: %+v", key, p)
   510  		}
   511  
   512  		k = k[8:]
   513  		p.StartUid = binary.BigEndian.Uint64(k)
   514  	case ByteIndex:
   515  		if !p.HasStartUid {
   516  			p.Term = string(k)
   517  			break
   518  		}
   519  
   520  		if len(k) < 8 {
   521  			return p, errors.Errorf("StartUid length < 8 for key: %q, parsed key: %+v", key, p)
   522  		}
   523  
   524  		term := k[:len(k)-8]
   525  		startUid := k[len(k)-8:]
   526  		p.Term = string(term)
   527  		p.StartUid = binary.BigEndian.Uint64(startUid)
   528  	case ByteCount, ByteCountRev:
   529  		if len(k) < 4 {
   530  			return p, errors.Errorf("count length < 4 for key: %q, parsed key: %+v", key, p)
   531  		}
   532  		p.Count = binary.BigEndian.Uint32(k)
   533  
   534  		if !p.HasStartUid {
   535  			break
   536  		}
   537  
   538  		if len(k) < 12 {
   539  			return p, errors.Errorf("StartUid length < 8 for key: %q, parsed key: %+v", key, p)
   540  		}
   541  
   542  		k = k[4:]
   543  		p.StartUid = binary.BigEndian.Uint64(k)
   544  	default:
   545  		// Some other data type.
   546  		return p, errors.Errorf("Invalid data type")
   547  	}
   548  	return p, nil
   549  }
   550  
   551  var reservedPredicateMap = map[string]struct{}{
   552  	"dgraph.type": {},
   553  }
   554  
   555  var aclPredicateMap = map[string]struct{}{
   556  	"dgraph.xid":        {},
   557  	"dgraph.password":   {},
   558  	"dgraph.user.group": {},
   559  	"dgraph.group.acl":  {},
   560  }
   561  
   562  // IsReservedPredicate returns true if the predicate is in the reserved predicate list.
   563  func IsReservedPredicate(pred string) bool {
   564  	_, ok := reservedPredicateMap[strings.ToLower(pred)]
   565  	return ok || IsAclPredicate(pred)
   566  }
   567  
   568  // IsAclPredicate returns true if the predicate is in the list of reserved
   569  // predicates for the ACL feature.
   570  func IsAclPredicate(pred string) bool {
   571  	_, ok := aclPredicateMap[strings.ToLower(pred)]
   572  	return ok
   573  }
   574  
   575  // ReservedPredicates returns the complete list of reserved predicates.
   576  func ReservedPredicates() []string {
   577  	var preds []string
   578  	for pred := range reservedPredicateMap {
   579  		preds = append(preds, pred)
   580  	}
   581  	for pred := range aclPredicateMap {
   582  		preds = append(preds, pred)
   583  	}
   584  	return preds
   585  }