github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/bulk/schema.go (about)

     1  /*
     2   * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package bulk
    18  
    19  import (
    20  	"fmt"
    21  	"log"
    22  	"math"
    23  	"sync"
    24  
    25  	"github.com/dgraph-io/badger"
    26  	"github.com/dgraph-io/dgraph/posting"
    27  	"github.com/dgraph-io/dgraph/protos/pb"
    28  	"github.com/dgraph-io/dgraph/schema"
    29  	wk "github.com/dgraph-io/dgraph/worker"
    30  	"github.com/dgraph-io/dgraph/x"
    31  )
    32  
    33  type schemaStore struct {
    34  	sync.RWMutex
    35  	schemaMap map[string]*pb.SchemaUpdate
    36  	types     []*pb.TypeUpdate
    37  	*state
    38  }
    39  
    40  func newSchemaStore(initial *schema.ParsedSchema, opt options, state *state) *schemaStore {
    41  	s := &schemaStore{
    42  		schemaMap: map[string]*pb.SchemaUpdate{},
    43  		state:     state,
    44  	}
    45  
    46  	// Load all initial predicates. Some predicates that might not be used when
    47  	// the alpha is started (e.g ACL predicates) might be included but it's
    48  	// better to include them in case the input data contains triples with these
    49  	// predicates.
    50  	for _, update := range schema.CompleteInitialSchema() {
    51  		s.schemaMap[update.Predicate] = update
    52  	}
    53  
    54  	if opt.StoreXids {
    55  		s.schemaMap["xid"] = &pb.SchemaUpdate{
    56  			ValueType: pb.Posting_STRING,
    57  			Tokenizer: []string{"hash"},
    58  		}
    59  	}
    60  
    61  	for _, sch := range initial.Preds {
    62  		p := sch.Predicate
    63  		sch.Predicate = "" // Predicate is stored in the (badger) key, so not needed in the value.
    64  		if _, ok := s.schemaMap[p]; ok {
    65  			fmt.Printf("Predicate %q already exists in schema\n", p)
    66  			continue
    67  		}
    68  		s.schemaMap[p] = sch
    69  	}
    70  
    71  	s.types = initial.Types
    72  
    73  	return s
    74  }
    75  
    76  func (s *schemaStore) getSchema(pred string) *pb.SchemaUpdate {
    77  	s.RLock()
    78  	defer s.RUnlock()
    79  	return s.schemaMap[pred]
    80  }
    81  
    82  func (s *schemaStore) setSchemaAsList(pred string) {
    83  	s.Lock()
    84  	defer s.Unlock()
    85  	schema, ok := s.schemaMap[pred]
    86  	if !ok {
    87  		return
    88  	}
    89  	schema.List = true
    90  }
    91  
    92  func (s *schemaStore) validateType(de *pb.DirectedEdge, objectIsUID bool) {
    93  	if objectIsUID {
    94  		de.ValueType = pb.Posting_UID
    95  	}
    96  
    97  	s.RLock()
    98  	sch, ok := s.schemaMap[de.Attr]
    99  	s.RUnlock()
   100  	if !ok {
   101  		s.Lock()
   102  		sch, ok = s.schemaMap[de.Attr]
   103  		if !ok {
   104  			sch = &pb.SchemaUpdate{ValueType: de.ValueType}
   105  			if objectIsUID {
   106  				sch.List = true
   107  			}
   108  			s.schemaMap[de.Attr] = sch
   109  		}
   110  		s.Unlock()
   111  	}
   112  
   113  	err := wk.ValidateAndConvert(de, sch)
   114  	if err != nil {
   115  		log.Fatalf("RDF doesn't match schema: %v", err)
   116  	}
   117  }
   118  
   119  func (s *schemaStore) getPredicates(db *badger.DB) []string {
   120  	txn := db.NewTransactionAt(math.MaxUint64, false)
   121  	defer txn.Discard()
   122  
   123  	opts := badger.DefaultIteratorOptions
   124  	opts.PrefetchValues = false
   125  	itr := txn.NewIterator(opts)
   126  	defer itr.Close()
   127  
   128  	m := make(map[string]struct{})
   129  	for itr.Rewind(); itr.Valid(); {
   130  		item := itr.Item()
   131  		pk, err := x.Parse(item.Key())
   132  		x.Check(err)
   133  		m[pk.Attr] = struct{}{}
   134  		itr.Seek(pk.SkipPredicate())
   135  		continue
   136  	}
   137  
   138  	var preds []string
   139  	for pred := range m {
   140  		preds = append(preds, pred)
   141  	}
   142  	return preds
   143  }
   144  
   145  func (s *schemaStore) write(db *badger.DB, preds []string) {
   146  	txn := db.NewTransactionAt(math.MaxUint64, true)
   147  	defer txn.Discard()
   148  	for _, pred := range preds {
   149  		sch, ok := s.schemaMap[pred]
   150  		if !ok {
   151  			continue
   152  		}
   153  		k := x.SchemaKey(pred)
   154  		v, err := sch.Marshal()
   155  		x.Check(err)
   156  		x.Check(txn.SetEntry(&badger.Entry{
   157  			Key:      k,
   158  			Value:    v,
   159  			UserMeta: posting.BitSchemaPosting}))
   160  	}
   161  
   162  	// Write all the types as all groups should have access to all the types.
   163  	for _, typ := range s.types {
   164  		k := x.TypeKey(typ.TypeName)
   165  		v, err := typ.Marshal()
   166  		x.Check(err)
   167  		x.Check(txn.SetEntry(&badger.Entry{
   168  			Key:      k,
   169  			Value:    v,
   170  			UserMeta: posting.BitSchemaPosting,
   171  		}))
   172  	}
   173  
   174  	// Write schema always at timestamp 1, s.state.writeTs may not be equal to 1
   175  	// if bulk loader was restarted or other similar scenarios.
   176  	x.Check(txn.CommitAt(1, nil))
   177  }