github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/dgraph/cmd/bulk/schema.go (about) 1 /* 2 * Copyright 2017-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package bulk 18 19 import ( 20 "fmt" 21 "log" 22 "math" 23 "sync" 24 25 "github.com/dgraph-io/badger" 26 "github.com/dgraph-io/dgraph/posting" 27 "github.com/dgraph-io/dgraph/protos/pb" 28 "github.com/dgraph-io/dgraph/schema" 29 wk "github.com/dgraph-io/dgraph/worker" 30 "github.com/dgraph-io/dgraph/x" 31 ) 32 33 type schemaStore struct { 34 sync.RWMutex 35 schemaMap map[string]*pb.SchemaUpdate 36 types []*pb.TypeUpdate 37 *state 38 } 39 40 func newSchemaStore(initial *schema.ParsedSchema, opt options, state *state) *schemaStore { 41 s := &schemaStore{ 42 schemaMap: map[string]*pb.SchemaUpdate{}, 43 state: state, 44 } 45 46 // Load all initial predicates. Some predicates that might not be used when 47 // the alpha is started (e.g ACL predicates) might be included but it's 48 // better to include them in case the input data contains triples with these 49 // predicates. 50 for _, update := range schema.CompleteInitialSchema() { 51 s.schemaMap[update.Predicate] = update 52 } 53 54 if opt.StoreXids { 55 s.schemaMap["xid"] = &pb.SchemaUpdate{ 56 ValueType: pb.Posting_STRING, 57 Tokenizer: []string{"hash"}, 58 } 59 } 60 61 for _, sch := range initial.Preds { 62 p := sch.Predicate 63 sch.Predicate = "" // Predicate is stored in the (badger) key, so not needed in the value. 64 if _, ok := s.schemaMap[p]; ok { 65 fmt.Printf("Predicate %q already exists in schema\n", p) 66 continue 67 } 68 s.schemaMap[p] = sch 69 } 70 71 s.types = initial.Types 72 73 return s 74 } 75 76 func (s *schemaStore) getSchema(pred string) *pb.SchemaUpdate { 77 s.RLock() 78 defer s.RUnlock() 79 return s.schemaMap[pred] 80 } 81 82 func (s *schemaStore) setSchemaAsList(pred string) { 83 s.Lock() 84 defer s.Unlock() 85 schema, ok := s.schemaMap[pred] 86 if !ok { 87 return 88 } 89 schema.List = true 90 } 91 92 func (s *schemaStore) validateType(de *pb.DirectedEdge, objectIsUID bool) { 93 if objectIsUID { 94 de.ValueType = pb.Posting_UID 95 } 96 97 s.RLock() 98 sch, ok := s.schemaMap[de.Attr] 99 s.RUnlock() 100 if !ok { 101 s.Lock() 102 sch, ok = s.schemaMap[de.Attr] 103 if !ok { 104 sch = &pb.SchemaUpdate{ValueType: de.ValueType} 105 if objectIsUID { 106 sch.List = true 107 } 108 s.schemaMap[de.Attr] = sch 109 } 110 s.Unlock() 111 } 112 113 err := wk.ValidateAndConvert(de, sch) 114 if err != nil { 115 log.Fatalf("RDF doesn't match schema: %v", err) 116 } 117 } 118 119 func (s *schemaStore) getPredicates(db *badger.DB) []string { 120 txn := db.NewTransactionAt(math.MaxUint64, false) 121 defer txn.Discard() 122 123 opts := badger.DefaultIteratorOptions 124 opts.PrefetchValues = false 125 itr := txn.NewIterator(opts) 126 defer itr.Close() 127 128 m := make(map[string]struct{}) 129 for itr.Rewind(); itr.Valid(); { 130 item := itr.Item() 131 pk, err := x.Parse(item.Key()) 132 x.Check(err) 133 m[pk.Attr] = struct{}{} 134 itr.Seek(pk.SkipPredicate()) 135 continue 136 } 137 138 var preds []string 139 for pred := range m { 140 preds = append(preds, pred) 141 } 142 return preds 143 } 144 145 func (s *schemaStore) write(db *badger.DB, preds []string) { 146 txn := db.NewTransactionAt(math.MaxUint64, true) 147 defer txn.Discard() 148 for _, pred := range preds { 149 sch, ok := s.schemaMap[pred] 150 if !ok { 151 continue 152 } 153 k := x.SchemaKey(pred) 154 v, err := sch.Marshal() 155 x.Check(err) 156 x.Check(txn.SetEntry(&badger.Entry{ 157 Key: k, 158 Value: v, 159 UserMeta: posting.BitSchemaPosting})) 160 } 161 162 // Write all the types as all groups should have access to all the types. 163 for _, typ := range s.types { 164 k := x.TypeKey(typ.TypeName) 165 v, err := typ.Marshal() 166 x.Check(err) 167 x.Check(txn.SetEntry(&badger.Entry{ 168 Key: k, 169 Value: v, 170 UserMeta: posting.BitSchemaPosting, 171 })) 172 } 173 174 // Write schema always at timestamp 1, s.state.writeTs may not be equal to 1 175 // if bulk loader was restarted or other similar scenarios. 176 x.Check(txn.CommitAt(1, nil)) 177 }