github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/schema/parse.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package schema 18 19 import ( 20 "strings" 21 22 "github.com/dgraph-io/dgraph/lex" 23 "github.com/dgraph-io/dgraph/protos/pb" 24 "github.com/dgraph-io/dgraph/tok" 25 "github.com/dgraph-io/dgraph/types" 26 "github.com/dgraph-io/dgraph/x" 27 "github.com/pkg/errors" 28 ) 29 30 // ParseBytes parses the byte array which holds the schema. We will reset 31 // all the globals. 32 // Overwrites schema blindly - called only during initilization in testing 33 func ParseBytes(s []byte, gid uint32) (rerr error) { 34 if pstate == nil { 35 reset() 36 } 37 pstate.DeleteAll() 38 result, err := Parse(string(s)) 39 if err != nil { 40 return err 41 } 42 43 for _, update := range result.Preds { 44 State().Set(update.Predicate, *update) 45 } 46 return nil 47 } 48 49 func parseDirective(it *lex.ItemIterator, schema *pb.SchemaUpdate, t types.TypeID) error { 50 it.Next() 51 next := it.Item() 52 if next.Typ != itemText { 53 return next.Errorf("Missing directive name") 54 } 55 switch next.Val { 56 case "reverse": 57 if t != types.UidID { 58 return next.Errorf("Cannot reverse for non-UID type") 59 } 60 schema.Directive = pb.SchemaUpdate_REVERSE 61 case "index": 62 tokenizer, err := parseIndexDirective(it, schema.Predicate, t) 63 if err != nil { 64 return err 65 } 66 schema.Directive = pb.SchemaUpdate_INDEX 67 schema.Tokenizer = tokenizer 68 case "count": 69 schema.Count = true 70 case "upsert": 71 schema.Upsert = true 72 case "lang": 73 if t != types.StringID || schema.List { 74 return next.Errorf("@lang directive can only be specified for string type."+ 75 " Got: [%v] for attr: [%v]", t.Name(), schema.Predicate) 76 } 77 schema.Lang = true 78 default: 79 return next.Errorf("Invalid index specification") 80 } 81 it.Next() 82 83 return nil 84 } 85 86 func parseScalarPair(it *lex.ItemIterator, predicate string) (*pb.SchemaUpdate, error) { 87 it.Next() 88 next := it.Item() 89 switch { 90 // This check might seem redundant but it's necessary. We have two possibilities, 91 // 1) that the schema is of form: name@en: string . 92 // 93 // 2) or this alternate form: <name@en>: string . 94 // 95 // The itemAt test invalidates 1) and string.Contains() tests for 2). We don't allow 96 // '@' in predicate names, so both forms are disallowed. Handling them here avoids 97 // messing with the lexer and IRI values. 98 case next.Typ == itemAt || strings.Contains(predicate, "@"): 99 return nil, next.Errorf("Invalid '@' in name") 100 case next.Typ != itemColon: 101 return nil, next.Errorf("Missing colon") 102 case !it.Next(): 103 return nil, next.Errorf("Invalid ending while trying to parse schema.") 104 } 105 next = it.Item() 106 schema := &pb.SchemaUpdate{Predicate: predicate} 107 // Could be list type. 108 if next.Typ == itemLeftSquare { 109 schema.List = true 110 if !it.Next() { 111 return nil, next.Errorf("Invalid ending while trying to parse schema.") 112 } 113 next = it.Item() 114 } 115 116 if next.Typ != itemText { 117 return nil, next.Errorf("Missing Type") 118 } 119 typ := strings.ToLower(next.Val) 120 // We ignore the case for types. 121 t, ok := types.TypeForName(typ) 122 if !ok { 123 return nil, next.Errorf("Undefined Type") 124 } 125 if schema.List { 126 if uint32(t) == uint32(types.PasswordID) || uint32(t) == uint32(types.BoolID) { 127 return nil, next.Errorf("Unsupported type for list: [%s].", types.TypeID(t).Name()) 128 } 129 } 130 schema.ValueType = t.Enum() 131 132 // Check for index / reverse. 133 it.Next() 134 next = it.Item() 135 if schema.List { 136 if next.Typ != itemRightSquare { 137 return nil, next.Errorf("Unclosed [ while parsing schema for: %s", predicate) 138 } 139 if !it.Next() { 140 return nil, next.Errorf("Invalid ending") 141 } 142 next = it.Item() 143 } 144 145 for { 146 if next.Typ != itemAt { 147 break 148 } 149 if err := parseDirective(it, schema, t); err != nil { 150 return nil, err 151 } 152 next = it.Item() 153 } 154 155 if next.Typ != itemDot { 156 return nil, next.Errorf("Invalid ending") 157 } 158 it.Next() 159 next = it.Item() 160 if next.Typ == lex.ItemEOF { 161 it.Prev() 162 return schema, nil 163 } 164 if next.Typ != itemNewLine { 165 return nil, next.Errorf("Invalid ending") 166 } 167 return schema, nil 168 } 169 170 // parseIndexDirective works on "@index" or "@index(customtokenizer)". 171 func parseIndexDirective(it *lex.ItemIterator, predicate string, 172 typ types.TypeID) ([]string, error) { 173 var tokenizers []string 174 var seen = make(map[string]bool) 175 var seenSortableTok bool 176 177 if typ == types.UidID || typ == types.DefaultID || typ == types.PasswordID { 178 return tokenizers, it.Item().Errorf("Indexing not allowed on predicate %s of type %s", 179 predicate, typ.Name()) 180 } 181 if !it.Next() { 182 // Nothing to read. 183 return []string{}, it.Item().Errorf("Invalid ending.") 184 } 185 next := it.Item() 186 if next.Typ != itemLeftRound { 187 it.Prev() // Backup. 188 return []string{}, it.Item().Errorf("Require type of tokenizer for pred: %s for indexing.", 189 predicate) 190 } 191 192 expectArg := true 193 // Look for tokenizers. 194 for { 195 it.Next() 196 next = it.Item() 197 if next.Typ == itemRightRound { 198 break 199 } 200 if next.Typ == itemComma { 201 if expectArg { 202 return nil, next.Errorf("Expected a tokenizer but got comma") 203 } 204 expectArg = true 205 continue 206 } 207 if next.Typ != itemText { 208 return tokenizers, next.Errorf("Expected directive arg but got: %v", next.Val) 209 } 210 if !expectArg { 211 return tokenizers, next.Errorf("Expected a comma but got: %v", next) 212 } 213 // Look for custom tokenizer. 214 tokenizer, has := tok.GetTokenizer(strings.ToLower(next.Val)) 215 if !has { 216 return tokenizers, next.Errorf("Invalid tokenizer %s", next.Val) 217 } 218 tokenizerType, ok := types.TypeForName(tokenizer.Type()) 219 x.AssertTrue(ok) // Type is validated during tokenizer loading. 220 if tokenizerType != typ { 221 return tokenizers, 222 next.Errorf("Tokenizer: %s isn't valid for predicate: %s of type: %s", 223 tokenizer.Name(), predicate, typ.Name()) 224 } 225 if _, found := seen[tokenizer.Name()]; found { 226 return tokenizers, next.Errorf("Duplicate tokenizers defined for pred %v", 227 predicate) 228 } 229 if tokenizer.IsSortable() { 230 if seenSortableTok { 231 return nil, next.Errorf("More than one sortable index encountered for: %v", 232 predicate) 233 } 234 seenSortableTok = true 235 } 236 tokenizers = append(tokenizers, tokenizer.Name()) 237 seen[tokenizer.Name()] = true 238 expectArg = false 239 } 240 return tokenizers, nil 241 } 242 243 // resolveTokenizers resolves default tokenizers and verifies tokenizers definitions. 244 func resolveTokenizers(updates []*pb.SchemaUpdate) error { 245 for _, schema := range updates { 246 typ := types.TypeID(schema.ValueType) 247 248 if (typ == types.UidID || typ == types.DefaultID || typ == types.PasswordID) && 249 schema.Directive == pb.SchemaUpdate_INDEX { 250 return errors.Errorf("Indexing not allowed on predicate %s of type %s", 251 schema.Predicate, typ.Name()) 252 } 253 254 if typ == types.UidID { 255 continue 256 } 257 258 if len(schema.Tokenizer) == 0 && schema.Directive == pb.SchemaUpdate_INDEX { 259 return errors.Errorf("Require type of tokenizer for pred: %s of type: %s for indexing.", 260 schema.Predicate, typ.Name()) 261 } else if len(schema.Tokenizer) > 0 && schema.Directive != pb.SchemaUpdate_INDEX { 262 return errors.Errorf("Tokenizers present without indexing on attr %s", schema.Predicate) 263 } 264 // check for valid tokeniser types and duplicates 265 var seen = make(map[string]bool) 266 var seenSortableTok bool 267 for _, t := range schema.Tokenizer { 268 tokenizer, has := tok.GetTokenizer(t) 269 if !has { 270 return errors.Errorf("Invalid tokenizer %s", t) 271 } 272 tokenizerType, ok := types.TypeForName(tokenizer.Type()) 273 x.AssertTrue(ok) // Type is validated during tokenizer loading. 274 if tokenizerType != typ { 275 return errors.Errorf("Tokenizer: %s isn't valid for predicate: %s of type: %s", 276 tokenizer.Name(), schema.Predicate, typ.Name()) 277 } 278 if _, ok := seen[tokenizer.Name()]; !ok { 279 seen[tokenizer.Name()] = true 280 } else { 281 return errors.Errorf("Duplicate tokenizers present for attr %s", schema.Predicate) 282 } 283 if tokenizer.IsSortable() { 284 if seenSortableTok { 285 return errors.Errorf("More than one sortable index encountered for: %v", 286 schema.Predicate) 287 } 288 seenSortableTok = true 289 } 290 } 291 } 292 return nil 293 } 294 295 func parseTypeDeclaration(it *lex.ItemIterator) (*pb.TypeUpdate, error) { 296 // Iterator is currently on the token corresponding to the keyword type. 297 if it.Item().Typ != itemText || it.Item().Val != "type" { 298 return nil, it.Item().Errorf("Expected type keyword. Got %v", it.Item().Val) 299 } 300 301 it.Next() 302 if it.Item().Typ != itemText { 303 return nil, it.Item().Errorf("Expected type name. Got %v", it.Item().Val) 304 } 305 typeUpdate := &pb.TypeUpdate{TypeName: it.Item().Val} 306 307 it.Next() 308 if it.Item().Typ != itemLeftCurl { 309 return nil, it.Item().Errorf("Expected {. Got %v", it.Item().Val) 310 } 311 312 var fields []*pb.SchemaUpdate 313 for it.Next() { 314 item := it.Item() 315 316 switch item.Typ { 317 case itemRightCurl: 318 it.Next() 319 if it.Item().Typ != itemNewLine { 320 return nil, it.Item().Errorf("Expected new line after type declaration. Got %v", 321 it.Item().Val) 322 } 323 324 typeUpdate.Fields = fields 325 return typeUpdate, nil 326 case itemText: 327 field, err := parseTypeField(it) 328 if err != nil { 329 return nil, err 330 } 331 fields = append(fields, field) 332 case itemNewLine: 333 // Ignore empty lines. 334 default: 335 return nil, it.Item().Errorf("Unexpected token. Got %v", it.Item().Val) 336 } 337 } 338 return nil, errors.Errorf("Shouldn't reach here.") 339 } 340 341 func parseTypeField(it *lex.ItemIterator) (*pb.SchemaUpdate, error) { 342 field := &pb.SchemaUpdate{Predicate: it.Item().Val} 343 var list bool 344 345 it.Next() 346 if it.Item().Typ != itemColon { 347 return nil, it.Item().Errorf("Missing colon in type declaration. Got %v", it.Item().Val) 348 } 349 350 it.Next() 351 if it.Item().Typ == itemLeftSquare { 352 list = true 353 it.Next() 354 } 355 356 if it.Item().Typ != itemText { 357 return nil, it.Item().Errorf("Missing field type in type declaration. Got %v", 358 it.Item().Val) 359 } 360 field.ValueType = getType(it.Item().Val) 361 if field.ValueType == pb.Posting_OBJECT { 362 field.ObjectTypeName = it.Item().Val 363 } 364 365 it.Next() 366 if it.Item().Typ == itemExclamationMark { 367 field.NonNullable = true 368 it.Next() 369 } 370 371 if list { 372 if it.Item().Typ != itemRightSquare { 373 return nil, it.Item().Errorf("Expected matching square bracket. Got %v", it.Item().Val) 374 } 375 field.List = true 376 it.Next() 377 378 if it.Item().Typ == itemExclamationMark { 379 field.NonNullableList = true 380 it.Next() 381 } 382 } 383 384 if it.Item().Typ != itemNewLine { 385 return nil, it.Item().Errorf("Expected new line after field declaration. Got %v", it.Item().Val) 386 } 387 388 return field, nil 389 } 390 391 func getType(typeName string) pb.Posting_ValType { 392 typ, ok := types.TypeForName(strings.ToLower(typeName)) 393 if ok { 394 return pb.Posting_ValType(typ) 395 } 396 397 return pb.Posting_OBJECT 398 } 399 400 // ParsedSchema represents the parsed schema and type updates. 401 type ParsedSchema struct { 402 Preds []*pb.SchemaUpdate 403 Types []*pb.TypeUpdate 404 } 405 406 func isTypeDeclaration(item lex.Item, it *lex.ItemIterator) bool { 407 if item.Val != "type" { 408 return false 409 } 410 411 nextItems, err := it.Peek(2) 412 switch { 413 case err != nil || len(nextItems) != 2: 414 return false 415 416 case nextItems[0].Typ != itemText: 417 return false 418 419 case nextItems[1].Typ != itemLeftCurl: 420 return false 421 } 422 423 return true 424 } 425 426 // Parse parses a schema string and returns the schema representation for it. 427 func Parse(s string) (*ParsedSchema, error) { 428 var result ParsedSchema 429 430 var l lex.Lexer 431 l.Reset(s) 432 l.Run(lexText) 433 if err := l.ValidateResult(); err != nil { 434 return nil, err 435 } 436 it := l.NewIterator() 437 for it.Next() { 438 item := it.Item() 439 switch item.Typ { 440 case lex.ItemEOF: 441 if err := resolveTokenizers(result.Preds); err != nil { 442 return nil, errors.Wrapf(err, "failed to enrich schema") 443 } 444 return &result, nil 445 446 case itemText: 447 if isTypeDeclaration(item, it) { 448 typeUpdate, err := parseTypeDeclaration(it) 449 if err != nil { 450 return nil, err 451 } 452 result.Types = append(result.Types, typeUpdate) 453 continue 454 } 455 456 schema, err := parseScalarPair(it, item.Val) 457 if err != nil { 458 return nil, err 459 } 460 result.Preds = append(result.Preds, schema) 461 case itemNewLine: 462 // pass empty line 463 464 default: 465 return nil, it.Item().Errorf("Unexpected token: %v while parsing schema", item) 466 } 467 } 468 return nil, errors.Errorf("Shouldn't reach here") 469 }