github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/schema/schema.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package schema
    18  
    19  import (
    20  	"bytes"
    21  	"encoding/hex"
    22  	"fmt"
    23  	"sync"
    24  
    25  	"github.com/dgraph-io/badger"
    26  	"github.com/golang/glog"
    27  	"github.com/golang/protobuf/proto"
    28  	"golang.org/x/net/trace"
    29  
    30  	"github.com/dgraph-io/dgraph/protos/pb"
    31  	"github.com/dgraph-io/dgraph/tok"
    32  	"github.com/dgraph-io/dgraph/types"
    33  	"github.com/dgraph-io/dgraph/x"
    34  	"github.com/pkg/errors"
    35  )
    36  
    37  var (
    38  	pstate *state
    39  	pstore *badger.DB
    40  )
    41  
    42  func (s *state) init() {
    43  	s.predicate = make(map[string]*pb.SchemaUpdate)
    44  	s.types = make(map[string]*pb.TypeUpdate)
    45  	s.elog = trace.NewEventLog("Dgraph", "Schema")
    46  }
    47  
    48  type state struct {
    49  	sync.RWMutex
    50  	// Map containing predicate to type information.
    51  	predicate map[string]*pb.SchemaUpdate
    52  	types     map[string]*pb.TypeUpdate
    53  	elog      trace.EventLog
    54  }
    55  
    56  // State returns the struct holding the current schema.
    57  func State() *state {
    58  	return pstate
    59  }
    60  
    61  func (s *state) DeleteAll() {
    62  	s.Lock()
    63  	defer s.Unlock()
    64  
    65  	for pred := range s.predicate {
    66  		delete(s.predicate, pred)
    67  	}
    68  
    69  	for typ := range s.types {
    70  		delete(s.types, typ)
    71  	}
    72  }
    73  
    74  // Delete updates the schema in memory and disk
    75  func (s *state) Delete(attr string) error {
    76  	s.Lock()
    77  	defer s.Unlock()
    78  
    79  	glog.Infof("Deleting schema for predicate: [%s]", attr)
    80  	txn := pstore.NewTransactionAt(1, true)
    81  	if err := txn.Delete(x.SchemaKey(attr)); err != nil {
    82  		return err
    83  	}
    84  	// Delete is called rarely so sync write should be fine.
    85  	if err := txn.CommitAt(1, nil); err != nil {
    86  		return err
    87  	}
    88  
    89  	delete(s.predicate, attr)
    90  	return nil
    91  }
    92  
    93  // DeleteType updates the schema in memory and disk
    94  func (s *state) DeleteType(typeName string) error {
    95  	s.Lock()
    96  	defer s.Unlock()
    97  
    98  	glog.Infof("Deleting type definition for type: [%s]", typeName)
    99  	txn := pstore.NewTransactionAt(1, true)
   100  	if err := txn.Delete(x.TypeKey(typeName)); err != nil {
   101  		return err
   102  	}
   103  	// Delete is called rarely so sync write should be fine.
   104  	if err := txn.CommitAt(1, nil); err != nil {
   105  		return err
   106  	}
   107  
   108  	delete(s.types, typeName)
   109  	return nil
   110  }
   111  
   112  func logUpdate(schema pb.SchemaUpdate, pred string) string {
   113  	typ := types.TypeID(schema.ValueType).Name()
   114  	if schema.List {
   115  		typ = fmt.Sprintf("[%s]", typ)
   116  	}
   117  	return fmt.Sprintf("Setting schema for attr %s: %v, tokenizer: %v, directive: %v, count: %v\n",
   118  		pred, typ, schema.Tokenizer, schema.Directive, schema.Count)
   119  }
   120  
   121  func logTypeUpdate(typ pb.TypeUpdate, typeName string) string {
   122  	return fmt.Sprintf("Setting type definition for type %s: %v\n", typeName, typ)
   123  }
   124  
   125  // Set sets the schema for the given predicate in memory.
   126  // Schema mutations must flow through the update function, which are synced to the db.
   127  func (s *state) Set(pred string, schema pb.SchemaUpdate) {
   128  	s.Lock()
   129  	defer s.Unlock()
   130  	s.predicate[pred] = &schema
   131  	s.elog.Printf(logUpdate(schema, pred))
   132  }
   133  
   134  // SetType sets the type for the given predicate in memory.
   135  // schema mutations must flow through the update function, which are synced to the db.
   136  func (s *state) SetType(typeName string, typ pb.TypeUpdate) {
   137  	s.Lock()
   138  	defer s.Unlock()
   139  	s.types[typeName] = &typ
   140  	s.elog.Printf(logTypeUpdate(typ, typeName))
   141  }
   142  
   143  // Get gets the schema for the given predicate.
   144  func (s *state) Get(pred string) (pb.SchemaUpdate, bool) {
   145  	s.RLock()
   146  	defer s.RUnlock()
   147  	schema, has := s.predicate[pred]
   148  	if !has {
   149  		return pb.SchemaUpdate{}, false
   150  	}
   151  	return *schema, true
   152  }
   153  
   154  // GetType gets the type definition for the given type name.
   155  func (s *state) GetType(typeName string) (pb.TypeUpdate, bool) {
   156  	s.RLock()
   157  	defer s.RUnlock()
   158  	typ, has := s.types[typeName]
   159  	if !has {
   160  		return pb.TypeUpdate{}, false
   161  	}
   162  	return *typ, true
   163  }
   164  
   165  // TypeOf returns the schema type of predicate
   166  func (s *state) TypeOf(pred string) (types.TypeID, error) {
   167  	s.RLock()
   168  	defer s.RUnlock()
   169  	if schema, ok := s.predicate[pred]; ok {
   170  		return types.TypeID(schema.ValueType), nil
   171  	}
   172  	return types.UndefinedID, errors.Errorf("Schema not defined for predicate: %v.", pred)
   173  }
   174  
   175  // IsIndexed returns whether the predicate is indexed or not
   176  func (s *state) IsIndexed(pred string) bool {
   177  	s.RLock()
   178  	defer s.RUnlock()
   179  	if schema, ok := s.predicate[pred]; ok {
   180  		return len(schema.Tokenizer) > 0
   181  	}
   182  	return false
   183  }
   184  
   185  // IndexedFields returns the list of indexed fields
   186  func (s *state) IndexedFields() []string {
   187  	s.RLock()
   188  	defer s.RUnlock()
   189  	var out []string
   190  	for k, v := range s.predicate {
   191  		if len(v.Tokenizer) > 0 {
   192  			out = append(out, k)
   193  		}
   194  	}
   195  	return out
   196  }
   197  
   198  // Predicates returns the list of predicates for given group
   199  func (s *state) Predicates() []string {
   200  	s.RLock()
   201  	defer s.RUnlock()
   202  	var out []string
   203  	for k := range s.predicate {
   204  		out = append(out, k)
   205  	}
   206  	return out
   207  }
   208  
   209  // Types returns the list of types.
   210  func (s *state) Types() []string {
   211  	s.RLock()
   212  	defer s.RUnlock()
   213  	var out []string
   214  	for k := range s.types {
   215  		out = append(out, k)
   216  	}
   217  	return out
   218  }
   219  
   220  // Tokenizer returns the tokenizer for given predicate
   221  func (s *state) Tokenizer(pred string) []tok.Tokenizer {
   222  	s.RLock()
   223  	defer s.RUnlock()
   224  	schema, ok := s.predicate[pred]
   225  	x.AssertTruef(ok, "schema state not found for %s", pred)
   226  	var tokenizers []tok.Tokenizer
   227  	for _, it := range schema.Tokenizer {
   228  		t, found := tok.GetTokenizer(it)
   229  		x.AssertTruef(found, "Invalid tokenizer %s", it)
   230  		tokenizers = append(tokenizers, t)
   231  	}
   232  	return tokenizers
   233  }
   234  
   235  // TokenizerNames returns the tokenizer names for given predicate
   236  func (s *state) TokenizerNames(pred string) []string {
   237  	var names []string
   238  	tokenizers := s.Tokenizer(pred)
   239  	for _, t := range tokenizers {
   240  		names = append(names, t.Name())
   241  	}
   242  	return names
   243  }
   244  
   245  // HasTokenizer is a convenience func that checks if a given tokenizer is found in pred.
   246  // Returns true if found, else false.
   247  func (s *state) HasTokenizer(id byte, pred string) bool {
   248  	for _, t := range s.Tokenizer(pred) {
   249  		if t.Identifier() == id {
   250  			return true
   251  		}
   252  	}
   253  	return false
   254  }
   255  
   256  // IsReversed returns whether the predicate has reverse edge or not
   257  func (s *state) IsReversed(pred string) bool {
   258  	s.RLock()
   259  	defer s.RUnlock()
   260  	if schema, ok := s.predicate[pred]; ok {
   261  		return schema.Directive == pb.SchemaUpdate_REVERSE
   262  	}
   263  	return false
   264  }
   265  
   266  // HasCount returns whether we want to mantain a count index for the given predicate or not.
   267  func (s *state) HasCount(pred string) bool {
   268  	s.RLock()
   269  	defer s.RUnlock()
   270  	if schema, ok := s.predicate[pred]; ok {
   271  		return schema.Count
   272  	}
   273  	return false
   274  }
   275  
   276  // IsList returns whether the predicate is of list type.
   277  func (s *state) IsList(pred string) bool {
   278  	s.RLock()
   279  	defer s.RUnlock()
   280  	if schema, ok := s.predicate[pred]; ok {
   281  		return schema.List
   282  	}
   283  	return false
   284  }
   285  
   286  func (s *state) HasUpsert(pred string) bool {
   287  	s.RLock()
   288  	defer s.RUnlock()
   289  	if schema, ok := s.predicate[pred]; ok {
   290  		return schema.Upsert
   291  	}
   292  	return false
   293  }
   294  
   295  func (s *state) HasLang(pred string) bool {
   296  	s.RLock()
   297  	defer s.RUnlock()
   298  	if schema, ok := s.predicate[pred]; ok {
   299  		return schema.Lang
   300  	}
   301  	return false
   302  }
   303  
   304  // Init resets the schema state, setting the underlying DB to the given pointer.
   305  func Init(ps *badger.DB) {
   306  	pstore = ps
   307  	reset()
   308  }
   309  
   310  // Load reads the schema for the given predicate from the DB.
   311  func Load(predicate string) error {
   312  	if len(predicate) == 0 {
   313  		return errors.Errorf("Empty predicate")
   314  	}
   315  	key := x.SchemaKey(predicate)
   316  	txn := pstore.NewTransactionAt(1, false)
   317  	defer txn.Discard()
   318  	item, err := txn.Get(key)
   319  	if err == badger.ErrKeyNotFound {
   320  		return nil
   321  	}
   322  	if err != nil {
   323  		return err
   324  	}
   325  	var s pb.SchemaUpdate
   326  	err = item.Value(func(val []byte) error {
   327  		x.Check(s.Unmarshal(val))
   328  		return nil
   329  	})
   330  	if err != nil {
   331  		return err
   332  	}
   333  	State().Set(predicate, s)
   334  	State().elog.Printf(logUpdate(s, predicate))
   335  	glog.Infoln(logUpdate(s, predicate))
   336  	return nil
   337  }
   338  
   339  // LoadFromDb reads schema information from db and stores it in memory
   340  func LoadFromDb() error {
   341  	if err := LoadSchemaFromDb(); err != nil {
   342  		return err
   343  	}
   344  	return LoadTypesFromDb()
   345  }
   346  
   347  // LoadSchemaFromDb iterates through the DB and loads all the stored schema updates.
   348  func LoadSchemaFromDb() error {
   349  	prefix := x.SchemaPrefix()
   350  	txn := pstore.NewTransactionAt(1, false)
   351  	defer txn.Discard()
   352  	itr := txn.NewIterator(badger.DefaultIteratorOptions) // Need values, reversed=false.
   353  	defer itr.Close()
   354  
   355  	for itr.Seek(prefix); itr.Valid(); itr.Next() {
   356  		item := itr.Item()
   357  		key := item.Key()
   358  		if !bytes.HasPrefix(key, prefix) {
   359  			break
   360  		}
   361  		pk, err := x.Parse(key)
   362  		if err != nil {
   363  			glog.Errorf("Error while parsing key %s: %v", hex.Dump(key), err)
   364  			continue
   365  		}
   366  		attr := pk.Attr
   367  		var s pb.SchemaUpdate
   368  		err = item.Value(func(val []byte) error {
   369  			if len(val) == 0 {
   370  				s = pb.SchemaUpdate{Predicate: attr, ValueType: pb.Posting_DEFAULT}
   371  			}
   372  			x.Checkf(s.Unmarshal(val), "Error while loading schema from db")
   373  			State().Set(attr, s)
   374  			return nil
   375  		})
   376  		if err != nil {
   377  			return err
   378  		}
   379  	}
   380  	return nil
   381  }
   382  
   383  // LoadTypesFromDb iterates through the DB and loads all the stored type updates.
   384  func LoadTypesFromDb() error {
   385  	prefix := x.TypePrefix()
   386  	txn := pstore.NewTransactionAt(1, false)
   387  	defer txn.Discard()
   388  	itr := txn.NewIterator(badger.DefaultIteratorOptions) // Need values, reversed=false.
   389  	defer itr.Close()
   390  
   391  	for itr.Seek(prefix); itr.Valid(); itr.Next() {
   392  		item := itr.Item()
   393  		key := item.Key()
   394  		if !bytes.HasPrefix(key, prefix) {
   395  			break
   396  		}
   397  		pk, err := x.Parse(key)
   398  		if err != nil {
   399  			glog.Errorf("Error while parsing key %s: %v", hex.Dump(key), err)
   400  			continue
   401  		}
   402  		attr := pk.Attr
   403  		var t pb.TypeUpdate
   404  		err = item.Value(func(val []byte) error {
   405  			if len(val) == 0 {
   406  				t = pb.TypeUpdate{TypeName: attr}
   407  			}
   408  			x.Checkf(t.Unmarshal(val), "Error while loading types from db")
   409  			State().SetType(attr, t)
   410  			return nil
   411  		})
   412  		if err != nil {
   413  			return err
   414  		}
   415  	}
   416  	return nil
   417  }
   418  
   419  // InitialSchema returns the schema updates to insert at the beginning of
   420  // Dgraph's execution. It looks at the worker options to determine which
   421  // attributes to insert.
   422  func InitialSchema() []*pb.SchemaUpdate {
   423  	return initialSchemaInternal(false)
   424  }
   425  
   426  // CompleteInitialSchema returns all the schema updates regardless of the worker
   427  // options. This is useful in situations where the worker options are not known
   428  // in advance and it's better to create all the reserved predicates and remove
   429  // them later than miss some of them. An example of such situation is during bulk
   430  // loading.
   431  func CompleteInitialSchema() []*pb.SchemaUpdate {
   432  	return initialSchemaInternal(true)
   433  }
   434  
   435  func initialSchemaInternal(all bool) []*pb.SchemaUpdate {
   436  	var initialSchema []*pb.SchemaUpdate
   437  
   438  	initialSchema = append(initialSchema, &pb.SchemaUpdate{
   439  		Predicate: "dgraph.type",
   440  		ValueType: pb.Posting_STRING,
   441  		Directive: pb.SchemaUpdate_INDEX,
   442  		Tokenizer: []string{"exact"},
   443  		List:      true,
   444  	})
   445  
   446  	if all || x.WorkerConfig.AclEnabled {
   447  		// propose the schema update for acl predicates
   448  		initialSchema = append(initialSchema, []*pb.SchemaUpdate{
   449  			{
   450  				Predicate: "dgraph.xid",
   451  				ValueType: pb.Posting_STRING,
   452  				Directive: pb.SchemaUpdate_INDEX,
   453  				Upsert:    true,
   454  				Tokenizer: []string{"exact"},
   455  			},
   456  			{
   457  				Predicate: "dgraph.password",
   458  				ValueType: pb.Posting_PASSWORD,
   459  			},
   460  			{
   461  				Predicate: "dgraph.user.group",
   462  				Directive: pb.SchemaUpdate_REVERSE,
   463  				ValueType: pb.Posting_UID,
   464  				List:      true,
   465  			},
   466  			{
   467  				Predicate: "dgraph.group.acl",
   468  				ValueType: pb.Posting_STRING,
   469  			}}...)
   470  	}
   471  
   472  	return initialSchema
   473  }
   474  
   475  // IsReservedPredicateChanged returns true if the initial update for the reserved
   476  // predicate pred is different than the passed update.
   477  func IsReservedPredicateChanged(pred string, update *pb.SchemaUpdate) bool {
   478  	// Return false for non-reserved predicates.
   479  	if !x.IsReservedPredicate(pred) {
   480  		return false
   481  	}
   482  
   483  	initialSchema := CompleteInitialSchema()
   484  	for _, original := range initialSchema {
   485  		if original.Predicate != pred {
   486  			continue
   487  		}
   488  		return !proto.Equal(original, update)
   489  	}
   490  	return true
   491  }
   492  
   493  func reset() {
   494  	pstate = new(state)
   495  	pstate.init()
   496  }