github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/schema/schema.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package schema 18 19 import ( 20 "bytes" 21 "encoding/hex" 22 "fmt" 23 "sync" 24 25 "github.com/dgraph-io/badger" 26 "github.com/golang/glog" 27 "github.com/golang/protobuf/proto" 28 "golang.org/x/net/trace" 29 30 "github.com/dgraph-io/dgraph/protos/pb" 31 "github.com/dgraph-io/dgraph/tok" 32 "github.com/dgraph-io/dgraph/types" 33 "github.com/dgraph-io/dgraph/x" 34 "github.com/pkg/errors" 35 ) 36 37 var ( 38 pstate *state 39 pstore *badger.DB 40 ) 41 42 func (s *state) init() { 43 s.predicate = make(map[string]*pb.SchemaUpdate) 44 s.types = make(map[string]*pb.TypeUpdate) 45 s.elog = trace.NewEventLog("Dgraph", "Schema") 46 } 47 48 type state struct { 49 sync.RWMutex 50 // Map containing predicate to type information. 51 predicate map[string]*pb.SchemaUpdate 52 types map[string]*pb.TypeUpdate 53 elog trace.EventLog 54 } 55 56 // State returns the struct holding the current schema. 57 func State() *state { 58 return pstate 59 } 60 61 func (s *state) DeleteAll() { 62 s.Lock() 63 defer s.Unlock() 64 65 for pred := range s.predicate { 66 delete(s.predicate, pred) 67 } 68 69 for typ := range s.types { 70 delete(s.types, typ) 71 } 72 } 73 74 // Delete updates the schema in memory and disk 75 func (s *state) Delete(attr string) error { 76 s.Lock() 77 defer s.Unlock() 78 79 glog.Infof("Deleting schema for predicate: [%s]", attr) 80 txn := pstore.NewTransactionAt(1, true) 81 if err := txn.Delete(x.SchemaKey(attr)); err != nil { 82 return err 83 } 84 // Delete is called rarely so sync write should be fine. 85 if err := txn.CommitAt(1, nil); err != nil { 86 return err 87 } 88 89 delete(s.predicate, attr) 90 return nil 91 } 92 93 // DeleteType updates the schema in memory and disk 94 func (s *state) DeleteType(typeName string) error { 95 s.Lock() 96 defer s.Unlock() 97 98 glog.Infof("Deleting type definition for type: [%s]", typeName) 99 txn := pstore.NewTransactionAt(1, true) 100 if err := txn.Delete(x.TypeKey(typeName)); err != nil { 101 return err 102 } 103 // Delete is called rarely so sync write should be fine. 104 if err := txn.CommitAt(1, nil); err != nil { 105 return err 106 } 107 108 delete(s.types, typeName) 109 return nil 110 } 111 112 func logUpdate(schema pb.SchemaUpdate, pred string) string { 113 typ := types.TypeID(schema.ValueType).Name() 114 if schema.List { 115 typ = fmt.Sprintf("[%s]", typ) 116 } 117 return fmt.Sprintf("Setting schema for attr %s: %v, tokenizer: %v, directive: %v, count: %v\n", 118 pred, typ, schema.Tokenizer, schema.Directive, schema.Count) 119 } 120 121 func logTypeUpdate(typ pb.TypeUpdate, typeName string) string { 122 return fmt.Sprintf("Setting type definition for type %s: %v\n", typeName, typ) 123 } 124 125 // Set sets the schema for the given predicate in memory. 126 // Schema mutations must flow through the update function, which are synced to the db. 127 func (s *state) Set(pred string, schema pb.SchemaUpdate) { 128 s.Lock() 129 defer s.Unlock() 130 s.predicate[pred] = &schema 131 s.elog.Printf(logUpdate(schema, pred)) 132 } 133 134 // SetType sets the type for the given predicate in memory. 135 // schema mutations must flow through the update function, which are synced to the db. 136 func (s *state) SetType(typeName string, typ pb.TypeUpdate) { 137 s.Lock() 138 defer s.Unlock() 139 s.types[typeName] = &typ 140 s.elog.Printf(logTypeUpdate(typ, typeName)) 141 } 142 143 // Get gets the schema for the given predicate. 144 func (s *state) Get(pred string) (pb.SchemaUpdate, bool) { 145 s.RLock() 146 defer s.RUnlock() 147 schema, has := s.predicate[pred] 148 if !has { 149 return pb.SchemaUpdate{}, false 150 } 151 return *schema, true 152 } 153 154 // GetType gets the type definition for the given type name. 155 func (s *state) GetType(typeName string) (pb.TypeUpdate, bool) { 156 s.RLock() 157 defer s.RUnlock() 158 typ, has := s.types[typeName] 159 if !has { 160 return pb.TypeUpdate{}, false 161 } 162 return *typ, true 163 } 164 165 // TypeOf returns the schema type of predicate 166 func (s *state) TypeOf(pred string) (types.TypeID, error) { 167 s.RLock() 168 defer s.RUnlock() 169 if schema, ok := s.predicate[pred]; ok { 170 return types.TypeID(schema.ValueType), nil 171 } 172 return types.UndefinedID, errors.Errorf("Schema not defined for predicate: %v.", pred) 173 } 174 175 // IsIndexed returns whether the predicate is indexed or not 176 func (s *state) IsIndexed(pred string) bool { 177 s.RLock() 178 defer s.RUnlock() 179 if schema, ok := s.predicate[pred]; ok { 180 return len(schema.Tokenizer) > 0 181 } 182 return false 183 } 184 185 // IndexedFields returns the list of indexed fields 186 func (s *state) IndexedFields() []string { 187 s.RLock() 188 defer s.RUnlock() 189 var out []string 190 for k, v := range s.predicate { 191 if len(v.Tokenizer) > 0 { 192 out = append(out, k) 193 } 194 } 195 return out 196 } 197 198 // Predicates returns the list of predicates for given group 199 func (s *state) Predicates() []string { 200 s.RLock() 201 defer s.RUnlock() 202 var out []string 203 for k := range s.predicate { 204 out = append(out, k) 205 } 206 return out 207 } 208 209 // Types returns the list of types. 210 func (s *state) Types() []string { 211 s.RLock() 212 defer s.RUnlock() 213 var out []string 214 for k := range s.types { 215 out = append(out, k) 216 } 217 return out 218 } 219 220 // Tokenizer returns the tokenizer for given predicate 221 func (s *state) Tokenizer(pred string) []tok.Tokenizer { 222 s.RLock() 223 defer s.RUnlock() 224 schema, ok := s.predicate[pred] 225 x.AssertTruef(ok, "schema state not found for %s", pred) 226 var tokenizers []tok.Tokenizer 227 for _, it := range schema.Tokenizer { 228 t, found := tok.GetTokenizer(it) 229 x.AssertTruef(found, "Invalid tokenizer %s", it) 230 tokenizers = append(tokenizers, t) 231 } 232 return tokenizers 233 } 234 235 // TokenizerNames returns the tokenizer names for given predicate 236 func (s *state) TokenizerNames(pred string) []string { 237 var names []string 238 tokenizers := s.Tokenizer(pred) 239 for _, t := range tokenizers { 240 names = append(names, t.Name()) 241 } 242 return names 243 } 244 245 // HasTokenizer is a convenience func that checks if a given tokenizer is found in pred. 246 // Returns true if found, else false. 247 func (s *state) HasTokenizer(id byte, pred string) bool { 248 for _, t := range s.Tokenizer(pred) { 249 if t.Identifier() == id { 250 return true 251 } 252 } 253 return false 254 } 255 256 // IsReversed returns whether the predicate has reverse edge or not 257 func (s *state) IsReversed(pred string) bool { 258 s.RLock() 259 defer s.RUnlock() 260 if schema, ok := s.predicate[pred]; ok { 261 return schema.Directive == pb.SchemaUpdate_REVERSE 262 } 263 return false 264 } 265 266 // HasCount returns whether we want to mantain a count index for the given predicate or not. 267 func (s *state) HasCount(pred string) bool { 268 s.RLock() 269 defer s.RUnlock() 270 if schema, ok := s.predicate[pred]; ok { 271 return schema.Count 272 } 273 return false 274 } 275 276 // IsList returns whether the predicate is of list type. 277 func (s *state) IsList(pred string) bool { 278 s.RLock() 279 defer s.RUnlock() 280 if schema, ok := s.predicate[pred]; ok { 281 return schema.List 282 } 283 return false 284 } 285 286 func (s *state) HasUpsert(pred string) bool { 287 s.RLock() 288 defer s.RUnlock() 289 if schema, ok := s.predicate[pred]; ok { 290 return schema.Upsert 291 } 292 return false 293 } 294 295 func (s *state) HasLang(pred string) bool { 296 s.RLock() 297 defer s.RUnlock() 298 if schema, ok := s.predicate[pred]; ok { 299 return schema.Lang 300 } 301 return false 302 } 303 304 // Init resets the schema state, setting the underlying DB to the given pointer. 305 func Init(ps *badger.DB) { 306 pstore = ps 307 reset() 308 } 309 310 // Load reads the schema for the given predicate from the DB. 311 func Load(predicate string) error { 312 if len(predicate) == 0 { 313 return errors.Errorf("Empty predicate") 314 } 315 key := x.SchemaKey(predicate) 316 txn := pstore.NewTransactionAt(1, false) 317 defer txn.Discard() 318 item, err := txn.Get(key) 319 if err == badger.ErrKeyNotFound { 320 return nil 321 } 322 if err != nil { 323 return err 324 } 325 var s pb.SchemaUpdate 326 err = item.Value(func(val []byte) error { 327 x.Check(s.Unmarshal(val)) 328 return nil 329 }) 330 if err != nil { 331 return err 332 } 333 State().Set(predicate, s) 334 State().elog.Printf(logUpdate(s, predicate)) 335 glog.Infoln(logUpdate(s, predicate)) 336 return nil 337 } 338 339 // LoadFromDb reads schema information from db and stores it in memory 340 func LoadFromDb() error { 341 if err := LoadSchemaFromDb(); err != nil { 342 return err 343 } 344 return LoadTypesFromDb() 345 } 346 347 // LoadSchemaFromDb iterates through the DB and loads all the stored schema updates. 348 func LoadSchemaFromDb() error { 349 prefix := x.SchemaPrefix() 350 txn := pstore.NewTransactionAt(1, false) 351 defer txn.Discard() 352 itr := txn.NewIterator(badger.DefaultIteratorOptions) // Need values, reversed=false. 353 defer itr.Close() 354 355 for itr.Seek(prefix); itr.Valid(); itr.Next() { 356 item := itr.Item() 357 key := item.Key() 358 if !bytes.HasPrefix(key, prefix) { 359 break 360 } 361 pk, err := x.Parse(key) 362 if err != nil { 363 glog.Errorf("Error while parsing key %s: %v", hex.Dump(key), err) 364 continue 365 } 366 attr := pk.Attr 367 var s pb.SchemaUpdate 368 err = item.Value(func(val []byte) error { 369 if len(val) == 0 { 370 s = pb.SchemaUpdate{Predicate: attr, ValueType: pb.Posting_DEFAULT} 371 } 372 x.Checkf(s.Unmarshal(val), "Error while loading schema from db") 373 State().Set(attr, s) 374 return nil 375 }) 376 if err != nil { 377 return err 378 } 379 } 380 return nil 381 } 382 383 // LoadTypesFromDb iterates through the DB and loads all the stored type updates. 384 func LoadTypesFromDb() error { 385 prefix := x.TypePrefix() 386 txn := pstore.NewTransactionAt(1, false) 387 defer txn.Discard() 388 itr := txn.NewIterator(badger.DefaultIteratorOptions) // Need values, reversed=false. 389 defer itr.Close() 390 391 for itr.Seek(prefix); itr.Valid(); itr.Next() { 392 item := itr.Item() 393 key := item.Key() 394 if !bytes.HasPrefix(key, prefix) { 395 break 396 } 397 pk, err := x.Parse(key) 398 if err != nil { 399 glog.Errorf("Error while parsing key %s: %v", hex.Dump(key), err) 400 continue 401 } 402 attr := pk.Attr 403 var t pb.TypeUpdate 404 err = item.Value(func(val []byte) error { 405 if len(val) == 0 { 406 t = pb.TypeUpdate{TypeName: attr} 407 } 408 x.Checkf(t.Unmarshal(val), "Error while loading types from db") 409 State().SetType(attr, t) 410 return nil 411 }) 412 if err != nil { 413 return err 414 } 415 } 416 return nil 417 } 418 419 // InitialSchema returns the schema updates to insert at the beginning of 420 // Dgraph's execution. It looks at the worker options to determine which 421 // attributes to insert. 422 func InitialSchema() []*pb.SchemaUpdate { 423 return initialSchemaInternal(false) 424 } 425 426 // CompleteInitialSchema returns all the schema updates regardless of the worker 427 // options. This is useful in situations where the worker options are not known 428 // in advance and it's better to create all the reserved predicates and remove 429 // them later than miss some of them. An example of such situation is during bulk 430 // loading. 431 func CompleteInitialSchema() []*pb.SchemaUpdate { 432 return initialSchemaInternal(true) 433 } 434 435 func initialSchemaInternal(all bool) []*pb.SchemaUpdate { 436 var initialSchema []*pb.SchemaUpdate 437 438 initialSchema = append(initialSchema, &pb.SchemaUpdate{ 439 Predicate: "dgraph.type", 440 ValueType: pb.Posting_STRING, 441 Directive: pb.SchemaUpdate_INDEX, 442 Tokenizer: []string{"exact"}, 443 List: true, 444 }) 445 446 if all || x.WorkerConfig.AclEnabled { 447 // propose the schema update for acl predicates 448 initialSchema = append(initialSchema, []*pb.SchemaUpdate{ 449 { 450 Predicate: "dgraph.xid", 451 ValueType: pb.Posting_STRING, 452 Directive: pb.SchemaUpdate_INDEX, 453 Upsert: true, 454 Tokenizer: []string{"exact"}, 455 }, 456 { 457 Predicate: "dgraph.password", 458 ValueType: pb.Posting_PASSWORD, 459 }, 460 { 461 Predicate: "dgraph.user.group", 462 Directive: pb.SchemaUpdate_REVERSE, 463 ValueType: pb.Posting_UID, 464 List: true, 465 }, 466 { 467 Predicate: "dgraph.group.acl", 468 ValueType: pb.Posting_STRING, 469 }}...) 470 } 471 472 return initialSchema 473 } 474 475 // IsReservedPredicateChanged returns true if the initial update for the reserved 476 // predicate pred is different than the passed update. 477 func IsReservedPredicateChanged(pred string, update *pb.SchemaUpdate) bool { 478 // Return false for non-reserved predicates. 479 if !x.IsReservedPredicate(pred) { 480 return false 481 } 482 483 initialSchema := CompleteInitialSchema() 484 for _, original := range initialSchema { 485 if original.Predicate != pred { 486 continue 487 } 488 return !proto.Equal(original, update) 489 } 490 return true 491 } 492 493 func reset() { 494 pstate = new(state) 495 pstate.init() 496 }