github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/schema.go (about)

     1  /*
     2   * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package bulk
    18  
    19  import (
    20  	"fmt"
    21  	"log"
    22  	"math"
    23  	"sync"
    24  
    25  	"github.com/dgraph-io/badger/v3"
    26  	"github.com/dgraph-io/dgraph/posting"
    27  	"github.com/dgraph-io/dgraph/protos/pb"
    28  	"github.com/dgraph-io/dgraph/schema"
    29  	wk "github.com/dgraph-io/dgraph/worker"
    30  	"github.com/dgraph-io/dgraph/x"
    31  )
    32  
    33  type schemaStore struct {
    34  	sync.RWMutex
    35  	schemaMap map[string]*pb.SchemaUpdate
    36  	types     []*pb.TypeUpdate
    37  	*state
    38  }
    39  
    40  func newSchemaStore(initial *schema.ParsedSchema, opt *options, state *state) *schemaStore {
    41  	if opt == nil {
    42  		log.Fatalf("Cannot create schema store with nil options.")
    43  	}
    44  
    45  	s := &schemaStore{
    46  		schemaMap: map[string]*pb.SchemaUpdate{},
    47  		state:     state,
    48  	}
    49  
    50  	// Initialize only for the default namespace. Initialization for other namespaces will be done
    51  	// whenever we see data for a new namespace.
    52  	s.checkAndSetInitialSchema(x.GalaxyNamespace)
    53  
    54  	s.types = initial.Types
    55  	// This is from the schema read from the schema file.
    56  	for _, sch := range initial.Preds {
    57  		p := sch.Predicate
    58  		sch.Predicate = "" // Predicate is stored in the (badger) key, so not needed in the value.
    59  		if _, ok := s.schemaMap[p]; ok {
    60  			fmt.Printf("Predicate %q already exists in schema\n", p)
    61  			continue
    62  		}
    63  		s.checkAndSetInitialSchema(x.ParseNamespace(p))
    64  		s.schemaMap[p] = sch
    65  	}
    66  
    67  	return s
    68  }
    69  
    70  func (s *schemaStore) getSchema(pred string) *pb.SchemaUpdate {
    71  	s.RLock()
    72  	defer s.RUnlock()
    73  	return s.schemaMap[pred]
    74  }
    75  
    76  func (s *schemaStore) setSchemaAsList(pred string) {
    77  	s.Lock()
    78  	defer s.Unlock()
    79  	sch, ok := s.schemaMap[pred]
    80  	if !ok {
    81  		return
    82  	}
    83  	sch.List = true
    84  }
    85  
    86  // checkAndSetInitialSchema initializes the schema for namespace if it does not already exist.
    87  func (s *schemaStore) checkAndSetInitialSchema(namespace uint64) {
    88  	if _, ok := s.namespaces.Load(namespace); ok {
    89  		return
    90  	}
    91  	s.Lock()
    92  	defer s.Unlock()
    93  
    94  	if _, ok := s.namespaces.Load(namespace); ok {
    95  		return
    96  	}
    97  	// Load all initial predicates. Some predicates that might not be used when
    98  	// the alpha is started (e.g ACL predicates) might be included but it's
    99  	// better to include them in case the input data contains triples with these
   100  	// predicates.
   101  	for _, update := range schema.CompleteInitialSchema(namespace) {
   102  		s.schemaMap[update.Predicate] = update
   103  	}
   104  	s.types = append(s.types, schema.CompleteInitialTypes(namespace)...)
   105  
   106  	if s.opt.StoreXids {
   107  		s.schemaMap[x.NamespaceAttr(namespace, "xid")] = &pb.SchemaUpdate{
   108  			ValueType: pb.Posting_STRING,
   109  			Tokenizer: []string{"hash"},
   110  		}
   111  	}
   112  	s.namespaces.Store(namespace, struct{}{})
   113  }
   114  
   115  func (s *schemaStore) validateType(de *pb.DirectedEdge, objectIsUID bool) {
   116  	if objectIsUID {
   117  		de.ValueType = pb.Posting_UID
   118  	}
   119  
   120  	s.RLock()
   121  	sch, ok := s.schemaMap[de.Attr]
   122  	s.RUnlock()
   123  	if !ok {
   124  		s.Lock()
   125  		sch, ok = s.schemaMap[de.Attr]
   126  		if !ok {
   127  			sch = &pb.SchemaUpdate{ValueType: de.ValueType}
   128  			if objectIsUID {
   129  				sch.List = true
   130  			}
   131  			s.schemaMap[de.Attr] = sch
   132  		}
   133  		s.Unlock()
   134  	}
   135  
   136  	err := wk.ValidateAndConvert(de, sch)
   137  	if err != nil {
   138  		log.Fatalf("RDF doesn't match schema: %v", err)
   139  	}
   140  }
   141  
   142  func (s *schemaStore) getPredicates(db *badger.DB) []string {
   143  	txn := db.NewTransactionAt(math.MaxUint64, false)
   144  	defer txn.Discard()
   145  
   146  	opts := badger.DefaultIteratorOptions
   147  	opts.PrefetchValues = false
   148  	itr := txn.NewIterator(opts)
   149  	defer itr.Close()
   150  
   151  	m := make(map[string]struct{})
   152  	for itr.Rewind(); itr.Valid(); {
   153  		item := itr.Item()
   154  		pk, err := x.Parse(item.Key())
   155  		x.Check(err)
   156  		m[pk.Attr] = struct{}{}
   157  		itr.Seek(pk.SkipPredicate())
   158  		continue
   159  	}
   160  
   161  	var preds []string
   162  	for pred := range m {
   163  		preds = append(preds, pred)
   164  	}
   165  	return preds
   166  }
   167  
   168  func (s *schemaStore) write(db *badger.DB, preds []string) {
   169  	w := posting.NewTxnWriter(db)
   170  	for _, pred := range preds {
   171  		sch, ok := s.schemaMap[pred]
   172  		if !ok {
   173  			continue
   174  		}
   175  		k := x.SchemaKey(pred)
   176  		v, err := sch.Marshal()
   177  		x.Check(err)
   178  		// Write schema and types always at timestamp 1, s.state.writeTs may not be equal to 1
   179  		// if bulk loader was restarted or other similar scenarios.
   180  		x.Check(w.SetAt(k, v, posting.BitSchemaPosting, 1))
   181  	}
   182  
   183  	// Write all the types as all groups should have access to all the types.
   184  	for _, typ := range s.types {
   185  		k := x.TypeKey(typ.TypeName)
   186  		v, err := typ.Marshal()
   187  		x.Check(err)
   188  		x.Check(w.SetAt(k, v, posting.BitSchemaPosting, 1))
   189  	}
   190  
   191  	x.Check(w.Flush())
   192  }