github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/bulk/schema.go (about) 1 /* 2 * Copyright 2017-2022 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package bulk 18 19 import ( 20 "fmt" 21 "log" 22 "math" 23 "sync" 24 25 "github.com/dgraph-io/badger/v3" 26 "github.com/dgraph-io/dgraph/posting" 27 "github.com/dgraph-io/dgraph/protos/pb" 28 "github.com/dgraph-io/dgraph/schema" 29 wk "github.com/dgraph-io/dgraph/worker" 30 "github.com/dgraph-io/dgraph/x" 31 ) 32 33 type schemaStore struct { 34 sync.RWMutex 35 schemaMap map[string]*pb.SchemaUpdate 36 types []*pb.TypeUpdate 37 *state 38 } 39 40 func newSchemaStore(initial *schema.ParsedSchema, opt *options, state *state) *schemaStore { 41 if opt == nil { 42 log.Fatalf("Cannot create schema store with nil options.") 43 } 44 45 s := &schemaStore{ 46 schemaMap: map[string]*pb.SchemaUpdate{}, 47 state: state, 48 } 49 50 // Initialize only for the default namespace. Initialization for other namespaces will be done 51 // whenever we see data for a new namespace. 52 s.checkAndSetInitialSchema(x.GalaxyNamespace) 53 54 s.types = initial.Types 55 // This is from the schema read from the schema file. 56 for _, sch := range initial.Preds { 57 p := sch.Predicate 58 sch.Predicate = "" // Predicate is stored in the (badger) key, so not needed in the value. 59 if _, ok := s.schemaMap[p]; ok { 60 fmt.Printf("Predicate %q already exists in schema\n", p) 61 continue 62 } 63 s.checkAndSetInitialSchema(x.ParseNamespace(p)) 64 s.schemaMap[p] = sch 65 } 66 67 return s 68 } 69 70 func (s *schemaStore) getSchema(pred string) *pb.SchemaUpdate { 71 s.RLock() 72 defer s.RUnlock() 73 return s.schemaMap[pred] 74 } 75 76 func (s *schemaStore) setSchemaAsList(pred string) { 77 s.Lock() 78 defer s.Unlock() 79 sch, ok := s.schemaMap[pred] 80 if !ok { 81 return 82 } 83 sch.List = true 84 } 85 86 // checkAndSetInitialSchema initializes the schema for namespace if it does not already exist. 87 func (s *schemaStore) checkAndSetInitialSchema(namespace uint64) { 88 if _, ok := s.namespaces.Load(namespace); ok { 89 return 90 } 91 s.Lock() 92 defer s.Unlock() 93 94 if _, ok := s.namespaces.Load(namespace); ok { 95 return 96 } 97 // Load all initial predicates. Some predicates that might not be used when 98 // the alpha is started (e.g ACL predicates) might be included but it's 99 // better to include them in case the input data contains triples with these 100 // predicates. 101 for _, update := range schema.CompleteInitialSchema(namespace) { 102 s.schemaMap[update.Predicate] = update 103 } 104 s.types = append(s.types, schema.CompleteInitialTypes(namespace)...) 105 106 if s.opt.StoreXids { 107 s.schemaMap[x.NamespaceAttr(namespace, "xid")] = &pb.SchemaUpdate{ 108 ValueType: pb.Posting_STRING, 109 Tokenizer: []string{"hash"}, 110 } 111 } 112 s.namespaces.Store(namespace, struct{}{}) 113 } 114 115 func (s *schemaStore) validateType(de *pb.DirectedEdge, objectIsUID bool) { 116 if objectIsUID { 117 de.ValueType = pb.Posting_UID 118 } 119 120 s.RLock() 121 sch, ok := s.schemaMap[de.Attr] 122 s.RUnlock() 123 if !ok { 124 s.Lock() 125 sch, ok = s.schemaMap[de.Attr] 126 if !ok { 127 sch = &pb.SchemaUpdate{ValueType: de.ValueType} 128 if objectIsUID { 129 sch.List = true 130 } 131 s.schemaMap[de.Attr] = sch 132 } 133 s.Unlock() 134 } 135 136 err := wk.ValidateAndConvert(de, sch) 137 if err != nil { 138 log.Fatalf("RDF doesn't match schema: %v", err) 139 } 140 } 141 142 func (s *schemaStore) getPredicates(db *badger.DB) []string { 143 txn := db.NewTransactionAt(math.MaxUint64, false) 144 defer txn.Discard() 145 146 opts := badger.DefaultIteratorOptions 147 opts.PrefetchValues = false 148 itr := txn.NewIterator(opts) 149 defer itr.Close() 150 151 m := make(map[string]struct{}) 152 for itr.Rewind(); itr.Valid(); { 153 item := itr.Item() 154 pk, err := x.Parse(item.Key()) 155 x.Check(err) 156 m[pk.Attr] = struct{}{} 157 itr.Seek(pk.SkipPredicate()) 158 continue 159 } 160 161 var preds []string 162 for pred := range m { 163 preds = append(preds, pred) 164 } 165 return preds 166 } 167 168 func (s *schemaStore) write(db *badger.DB, preds []string) { 169 w := posting.NewTxnWriter(db) 170 for _, pred := range preds { 171 sch, ok := s.schemaMap[pred] 172 if !ok { 173 continue 174 } 175 k := x.SchemaKey(pred) 176 v, err := sch.Marshal() 177 x.Check(err) 178 // Write schema and types always at timestamp 1, s.state.writeTs may not be equal to 1 179 // if bulk loader was restarted or other similar scenarios. 180 x.Check(w.SetAt(k, v, posting.BitSchemaPosting, 1)) 181 } 182 183 // Write all the types as all groups should have access to all the types. 184 for _, typ := range s.types { 185 k := x.TypeKey(typ.TypeName) 186 v, err := typ.Marshal() 187 x.Check(err) 188 x.Check(w.SetAt(k, v, posting.BitSchemaPosting, 1)) 189 } 190 191 x.Check(w.Flush()) 192 }