github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/schema/parse.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package schema
    18  
    19  import (
    20  	"strings"
    21  
    22  	"github.com/dgraph-io/dgraph/lex"
    23  	"github.com/dgraph-io/dgraph/protos/pb"
    24  	"github.com/dgraph-io/dgraph/tok"
    25  	"github.com/dgraph-io/dgraph/types"
    26  	"github.com/dgraph-io/dgraph/x"
    27  	"github.com/pkg/errors"
    28  )
    29  
    30  // ParseBytes parses the byte array which holds the schema. We will reset
    31  // all the globals.
    32  // Overwrites schema blindly - called only during initilization in testing
    33  func ParseBytes(s []byte, gid uint32) (rerr error) {
    34  	if pstate == nil {
    35  		reset()
    36  	}
    37  	pstate.DeleteAll()
    38  	result, err := Parse(string(s))
    39  	if err != nil {
    40  		return err
    41  	}
    42  
    43  	for _, update := range result.Preds {
    44  		State().Set(update.Predicate, *update)
    45  	}
    46  	return nil
    47  }
    48  
    49  func parseDirective(it *lex.ItemIterator, schema *pb.SchemaUpdate, t types.TypeID) error {
    50  	it.Next()
    51  	next := it.Item()
    52  	if next.Typ != itemText {
    53  		return next.Errorf("Missing directive name")
    54  	}
    55  	switch next.Val {
    56  	case "reverse":
    57  		if t != types.UidID {
    58  			return next.Errorf("Cannot reverse for non-UID type")
    59  		}
    60  		schema.Directive = pb.SchemaUpdate_REVERSE
    61  	case "index":
    62  		tokenizer, err := parseIndexDirective(it, schema.Predicate, t)
    63  		if err != nil {
    64  			return err
    65  		}
    66  		schema.Directive = pb.SchemaUpdate_INDEX
    67  		schema.Tokenizer = tokenizer
    68  	case "count":
    69  		schema.Count = true
    70  	case "upsert":
    71  		schema.Upsert = true
    72  	case "lang":
    73  		if t != types.StringID || schema.List {
    74  			return next.Errorf("@lang directive can only be specified for string type."+
    75  				" Got: [%v] for attr: [%v]", t.Name(), schema.Predicate)
    76  		}
    77  		schema.Lang = true
    78  	default:
    79  		return next.Errorf("Invalid index specification")
    80  	}
    81  	it.Next()
    82  
    83  	return nil
    84  }
    85  
    86  func parseScalarPair(it *lex.ItemIterator, predicate string) (*pb.SchemaUpdate, error) {
    87  	it.Next()
    88  	next := it.Item()
    89  	switch {
    90  	// This check might seem redundant but it's necessary. We have two possibilities,
    91  	//   1) that the schema is of form: name@en: string .
    92  	//
    93  	//   2) or this alternate form: <name@en>: string .
    94  	//
    95  	// The itemAt test invalidates 1) and string.Contains() tests for 2). We don't allow
    96  	// '@' in predicate names, so both forms are disallowed. Handling them here avoids
    97  	// messing with the lexer and IRI values.
    98  	case next.Typ == itemAt || strings.Contains(predicate, "@"):
    99  		return nil, next.Errorf("Invalid '@' in name")
   100  	case next.Typ != itemColon:
   101  		return nil, next.Errorf("Missing colon")
   102  	case !it.Next():
   103  		return nil, next.Errorf("Invalid ending while trying to parse schema.")
   104  	}
   105  	next = it.Item()
   106  	schema := &pb.SchemaUpdate{Predicate: predicate}
   107  	// Could be list type.
   108  	if next.Typ == itemLeftSquare {
   109  		schema.List = true
   110  		if !it.Next() {
   111  			return nil, next.Errorf("Invalid ending while trying to parse schema.")
   112  		}
   113  		next = it.Item()
   114  	}
   115  
   116  	if next.Typ != itemText {
   117  		return nil, next.Errorf("Missing Type")
   118  	}
   119  	typ := strings.ToLower(next.Val)
   120  	// We ignore the case for types.
   121  	t, ok := types.TypeForName(typ)
   122  	if !ok {
   123  		return nil, next.Errorf("Undefined Type")
   124  	}
   125  	if schema.List {
   126  		if uint32(t) == uint32(types.PasswordID) || uint32(t) == uint32(types.BoolID) {
   127  			return nil, next.Errorf("Unsupported type for list: [%s].", types.TypeID(t).Name())
   128  		}
   129  	}
   130  	schema.ValueType = t.Enum()
   131  
   132  	// Check for index / reverse.
   133  	it.Next()
   134  	next = it.Item()
   135  	if schema.List {
   136  		if next.Typ != itemRightSquare {
   137  			return nil, next.Errorf("Unclosed [ while parsing schema for: %s", predicate)
   138  		}
   139  		if !it.Next() {
   140  			return nil, next.Errorf("Invalid ending")
   141  		}
   142  		next = it.Item()
   143  	}
   144  
   145  	for {
   146  		if next.Typ != itemAt {
   147  			break
   148  		}
   149  		if err := parseDirective(it, schema, t); err != nil {
   150  			return nil, err
   151  		}
   152  		next = it.Item()
   153  	}
   154  
   155  	if next.Typ != itemDot {
   156  		return nil, next.Errorf("Invalid ending")
   157  	}
   158  	it.Next()
   159  	next = it.Item()
   160  	if next.Typ == lex.ItemEOF {
   161  		it.Prev()
   162  		return schema, nil
   163  	}
   164  	if next.Typ != itemNewLine {
   165  		return nil, next.Errorf("Invalid ending")
   166  	}
   167  	return schema, nil
   168  }
   169  
   170  // parseIndexDirective works on "@index" or "@index(customtokenizer)".
   171  func parseIndexDirective(it *lex.ItemIterator, predicate string,
   172  	typ types.TypeID) ([]string, error) {
   173  	var tokenizers []string
   174  	var seen = make(map[string]bool)
   175  	var seenSortableTok bool
   176  
   177  	if typ == types.UidID || typ == types.DefaultID || typ == types.PasswordID {
   178  		return tokenizers, it.Item().Errorf("Indexing not allowed on predicate %s of type %s",
   179  			predicate, typ.Name())
   180  	}
   181  	if !it.Next() {
   182  		// Nothing to read.
   183  		return []string{}, it.Item().Errorf("Invalid ending.")
   184  	}
   185  	next := it.Item()
   186  	if next.Typ != itemLeftRound {
   187  		it.Prev() // Backup.
   188  		return []string{}, it.Item().Errorf("Require type of tokenizer for pred: %s for indexing.",
   189  			predicate)
   190  	}
   191  
   192  	expectArg := true
   193  	// Look for tokenizers.
   194  	for {
   195  		it.Next()
   196  		next = it.Item()
   197  		if next.Typ == itemRightRound {
   198  			break
   199  		}
   200  		if next.Typ == itemComma {
   201  			if expectArg {
   202  				return nil, next.Errorf("Expected a tokenizer but got comma")
   203  			}
   204  			expectArg = true
   205  			continue
   206  		}
   207  		if next.Typ != itemText {
   208  			return tokenizers, next.Errorf("Expected directive arg but got: %v", next.Val)
   209  		}
   210  		if !expectArg {
   211  			return tokenizers, next.Errorf("Expected a comma but got: %v", next)
   212  		}
   213  		// Look for custom tokenizer.
   214  		tokenizer, has := tok.GetTokenizer(strings.ToLower(next.Val))
   215  		if !has {
   216  			return tokenizers, next.Errorf("Invalid tokenizer %s", next.Val)
   217  		}
   218  		tokenizerType, ok := types.TypeForName(tokenizer.Type())
   219  		x.AssertTrue(ok) // Type is validated during tokenizer loading.
   220  		if tokenizerType != typ {
   221  			return tokenizers,
   222  				next.Errorf("Tokenizer: %s isn't valid for predicate: %s of type: %s",
   223  					tokenizer.Name(), predicate, typ.Name())
   224  		}
   225  		if _, found := seen[tokenizer.Name()]; found {
   226  			return tokenizers, next.Errorf("Duplicate tokenizers defined for pred %v",
   227  				predicate)
   228  		}
   229  		if tokenizer.IsSortable() {
   230  			if seenSortableTok {
   231  				return nil, next.Errorf("More than one sortable index encountered for: %v",
   232  					predicate)
   233  			}
   234  			seenSortableTok = true
   235  		}
   236  		tokenizers = append(tokenizers, tokenizer.Name())
   237  		seen[tokenizer.Name()] = true
   238  		expectArg = false
   239  	}
   240  	return tokenizers, nil
   241  }
   242  
   243  // resolveTokenizers resolves default tokenizers and verifies tokenizers definitions.
   244  func resolveTokenizers(updates []*pb.SchemaUpdate) error {
   245  	for _, schema := range updates {
   246  		typ := types.TypeID(schema.ValueType)
   247  
   248  		if (typ == types.UidID || typ == types.DefaultID || typ == types.PasswordID) &&
   249  			schema.Directive == pb.SchemaUpdate_INDEX {
   250  			return errors.Errorf("Indexing not allowed on predicate %s of type %s",
   251  				schema.Predicate, typ.Name())
   252  		}
   253  
   254  		if typ == types.UidID {
   255  			continue
   256  		}
   257  
   258  		if len(schema.Tokenizer) == 0 && schema.Directive == pb.SchemaUpdate_INDEX {
   259  			return errors.Errorf("Require type of tokenizer for pred: %s of type: %s for indexing.",
   260  				schema.Predicate, typ.Name())
   261  		} else if len(schema.Tokenizer) > 0 && schema.Directive != pb.SchemaUpdate_INDEX {
   262  			return errors.Errorf("Tokenizers present without indexing on attr %s", schema.Predicate)
   263  		}
   264  		// check for valid tokeniser types and duplicates
   265  		var seen = make(map[string]bool)
   266  		var seenSortableTok bool
   267  		for _, t := range schema.Tokenizer {
   268  			tokenizer, has := tok.GetTokenizer(t)
   269  			if !has {
   270  				return errors.Errorf("Invalid tokenizer %s", t)
   271  			}
   272  			tokenizerType, ok := types.TypeForName(tokenizer.Type())
   273  			x.AssertTrue(ok) // Type is validated during tokenizer loading.
   274  			if tokenizerType != typ {
   275  				return errors.Errorf("Tokenizer: %s isn't valid for predicate: %s of type: %s",
   276  					tokenizer.Name(), schema.Predicate, typ.Name())
   277  			}
   278  			if _, ok := seen[tokenizer.Name()]; !ok {
   279  				seen[tokenizer.Name()] = true
   280  			} else {
   281  				return errors.Errorf("Duplicate tokenizers present for attr %s", schema.Predicate)
   282  			}
   283  			if tokenizer.IsSortable() {
   284  				if seenSortableTok {
   285  					return errors.Errorf("More than one sortable index encountered for: %v",
   286  						schema.Predicate)
   287  				}
   288  				seenSortableTok = true
   289  			}
   290  		}
   291  	}
   292  	return nil
   293  }
   294  
   295  func parseTypeDeclaration(it *lex.ItemIterator) (*pb.TypeUpdate, error) {
   296  	// Iterator is currently on the token corresponding to the keyword type.
   297  	if it.Item().Typ != itemText || it.Item().Val != "type" {
   298  		return nil, it.Item().Errorf("Expected type keyword. Got %v", it.Item().Val)
   299  	}
   300  
   301  	it.Next()
   302  	if it.Item().Typ != itemText {
   303  		return nil, it.Item().Errorf("Expected type name. Got %v", it.Item().Val)
   304  	}
   305  	typeUpdate := &pb.TypeUpdate{TypeName: it.Item().Val}
   306  
   307  	it.Next()
   308  	if it.Item().Typ != itemLeftCurl {
   309  		return nil, it.Item().Errorf("Expected {. Got %v", it.Item().Val)
   310  	}
   311  
   312  	var fields []*pb.SchemaUpdate
   313  	for it.Next() {
   314  		item := it.Item()
   315  
   316  		switch item.Typ {
   317  		case itemRightCurl:
   318  			it.Next()
   319  			if it.Item().Typ != itemNewLine {
   320  				return nil, it.Item().Errorf("Expected new line after type declaration. Got %v",
   321  					it.Item().Val)
   322  			}
   323  
   324  			typeUpdate.Fields = fields
   325  			return typeUpdate, nil
   326  		case itemText:
   327  			field, err := parseTypeField(it)
   328  			if err != nil {
   329  				return nil, err
   330  			}
   331  			fields = append(fields, field)
   332  		case itemNewLine:
   333  			// Ignore empty lines.
   334  		default:
   335  			return nil, it.Item().Errorf("Unexpected token. Got %v", it.Item().Val)
   336  		}
   337  	}
   338  	return nil, errors.Errorf("Shouldn't reach here.")
   339  }
   340  
   341  func parseTypeField(it *lex.ItemIterator) (*pb.SchemaUpdate, error) {
   342  	field := &pb.SchemaUpdate{Predicate: it.Item().Val}
   343  	var list bool
   344  
   345  	it.Next()
   346  	if it.Item().Typ != itemColon {
   347  		return nil, it.Item().Errorf("Missing colon in type declaration. Got %v", it.Item().Val)
   348  	}
   349  
   350  	it.Next()
   351  	if it.Item().Typ == itemLeftSquare {
   352  		list = true
   353  		it.Next()
   354  	}
   355  
   356  	if it.Item().Typ != itemText {
   357  		return nil, it.Item().Errorf("Missing field type in type declaration. Got %v",
   358  			it.Item().Val)
   359  	}
   360  	field.ValueType = getType(it.Item().Val)
   361  	if field.ValueType == pb.Posting_OBJECT {
   362  		field.ObjectTypeName = it.Item().Val
   363  	}
   364  
   365  	it.Next()
   366  	if it.Item().Typ == itemExclamationMark {
   367  		field.NonNullable = true
   368  		it.Next()
   369  	}
   370  
   371  	if list {
   372  		if it.Item().Typ != itemRightSquare {
   373  			return nil, it.Item().Errorf("Expected matching square bracket. Got %v", it.Item().Val)
   374  		}
   375  		field.List = true
   376  		it.Next()
   377  
   378  		if it.Item().Typ == itemExclamationMark {
   379  			field.NonNullableList = true
   380  			it.Next()
   381  		}
   382  	}
   383  
   384  	if it.Item().Typ != itemNewLine {
   385  		return nil, it.Item().Errorf("Expected new line after field declaration. Got %v", it.Item().Val)
   386  	}
   387  
   388  	return field, nil
   389  }
   390  
   391  func getType(typeName string) pb.Posting_ValType {
   392  	typ, ok := types.TypeForName(strings.ToLower(typeName))
   393  	if ok {
   394  		return pb.Posting_ValType(typ)
   395  	}
   396  
   397  	return pb.Posting_OBJECT
   398  }
   399  
   400  // ParsedSchema represents the parsed schema and type updates.
   401  type ParsedSchema struct {
   402  	Preds []*pb.SchemaUpdate
   403  	Types []*pb.TypeUpdate
   404  }
   405  
   406  func isTypeDeclaration(item lex.Item, it *lex.ItemIterator) bool {
   407  	if item.Val != "type" {
   408  		return false
   409  	}
   410  
   411  	nextItems, err := it.Peek(2)
   412  	switch {
   413  	case err != nil || len(nextItems) != 2:
   414  		return false
   415  
   416  	case nextItems[0].Typ != itemText:
   417  		return false
   418  
   419  	case nextItems[1].Typ != itemLeftCurl:
   420  		return false
   421  	}
   422  
   423  	return true
   424  }
   425  
   426  // Parse parses a schema string and returns the schema representation for it.
   427  func Parse(s string) (*ParsedSchema, error) {
   428  	var result ParsedSchema
   429  
   430  	var l lex.Lexer
   431  	l.Reset(s)
   432  	l.Run(lexText)
   433  	if err := l.ValidateResult(); err != nil {
   434  		return nil, err
   435  	}
   436  	it := l.NewIterator()
   437  	for it.Next() {
   438  		item := it.Item()
   439  		switch item.Typ {
   440  		case lex.ItemEOF:
   441  			if err := resolveTokenizers(result.Preds); err != nil {
   442  				return nil, errors.Wrapf(err, "failed to enrich schema")
   443  			}
   444  			return &result, nil
   445  
   446  		case itemText:
   447  			if isTypeDeclaration(item, it) {
   448  				typeUpdate, err := parseTypeDeclaration(it)
   449  				if err != nil {
   450  					return nil, err
   451  				}
   452  				result.Types = append(result.Types, typeUpdate)
   453  				continue
   454  			}
   455  
   456  			schema, err := parseScalarPair(it, item.Val)
   457  			if err != nil {
   458  				return nil, err
   459  			}
   460  			result.Preds = append(result.Preds, schema)
   461  		case itemNewLine:
   462  			// pass empty line
   463  
   464  		default:
   465  			return nil, it.Item().Errorf("Unexpected token: %v while parsing schema", item)
   466  		}
   467  	}
   468  	return nil, errors.Errorf("Shouldn't reach here")
   469  }