github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/gql/state.go (about)

     1  /*
     2   * Copyright 2015-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package gql is responsible for lexing and parsing a GraphQL query/mutation.
    18  package gql
    19  
    20  import (
    21  	"github.com/dgraph-io/dgraph/lex"
    22  )
    23  
    24  const (
    25  	leftCurl    = '{'
    26  	rightCurl   = '}'
    27  	leftRound   = '('
    28  	rightRound  = ')'
    29  	leftSquare  = '['
    30  	rightSquare = ']'
    31  	period      = '.'
    32  	comma       = ','
    33  	slash       = '/'
    34  	equal       = '='
    35  	quote       = '"'
    36  	at          = '@'
    37  	colon       = ':'
    38  	lsThan      = '<'
    39  	star        = '*'
    40  )
    41  
    42  // Constants representing type of different graphql lexed items.
    43  const (
    44  	itemText                 lex.ItemType = 5 + iota // plain text
    45  	itemLeftCurl                                     // left curly bracket
    46  	itemRightCurl                                    // right curly bracket
    47  	itemEqual                                        // equals to symbol
    48  	itemName                                         // [9] names
    49  	itemOpType                                       // operation type
    50  	itemLeftRound                                    // left round bracket
    51  	itemRightRound                                   // right round bracket
    52  	itemColon                                        // Colon
    53  	itemAt                                           // @
    54  	itemPeriod                                       // .
    55  	itemDollar                                       // $
    56  	itemRegex                                        // /
    57  	itemMutationOp                                   // mutation operation (set, delete)
    58  	itemMutationOpContent                            // mutation operation content
    59  	itemUpsertBlock                                  // mutation upsert block
    60  	itemUpsertBlockOp                                // upsert block op (query, mutate)
    61  	itemUpsertBlockOpContent                         // upsert block operations' content
    62  	itemLeftSquare
    63  	itemRightSquare
    64  	itemComma
    65  	itemMathOp
    66  )
    67  
    68  // lexIdentifyBlock identifies whether it is an upsert block
    69  // If the block begins with "{" => mutation block
    70  // Else if the block begins with "upsert" => upsert block
    71  func lexIdentifyBlock(l *lex.Lexer) lex.StateFn {
    72  	l.Mode = lexIdentifyBlock
    73  	for {
    74  		switch r := l.Next(); {
    75  		case isSpace(r) || lex.IsEndOfLine(r):
    76  			l.Ignore()
    77  		case isNameBegin(r):
    78  			return lexNameBlock
    79  		case r == leftCurl:
    80  			l.Backup()
    81  			return lexInsideMutation
    82  		case r == '#':
    83  			return lexComment
    84  		case r == lex.EOF:
    85  			return l.Errorf("Invalid mutation block")
    86  		default:
    87  			return l.Errorf("Unexpected character while identifying mutation block: %#U", r)
    88  		}
    89  	}
    90  }
    91  
    92  // lexNameBlock lexes the blocks, for now, only upsert block
    93  func lexNameBlock(l *lex.Lexer) lex.StateFn {
    94  	// The caller already checked isNameBegin, and absorbed one rune.
    95  	l.AcceptRun(isNameSuffix)
    96  	switch word := l.Input[l.Start:l.Pos]; word {
    97  	case "upsert":
    98  		l.Emit(itemUpsertBlock)
    99  		return lexUpsertBlock
   100  	default:
   101  		return l.Errorf("Invalid block: [%s]", word)
   102  	}
   103  }
   104  
   105  // lexUpsertBlock lexes the upsert block
   106  func lexUpsertBlock(l *lex.Lexer) lex.StateFn {
   107  	l.Mode = lexUpsertBlock
   108  	for {
   109  		switch r := l.Next(); {
   110  		case r == rightCurl:
   111  			l.BlockDepth--
   112  			l.Emit(itemRightCurl)
   113  			if l.BlockDepth == 0 {
   114  				return lexTopLevel
   115  			}
   116  		case r == leftCurl:
   117  			l.BlockDepth++
   118  			l.Emit(itemLeftCurl)
   119  		case isSpace(r) || lex.IsEndOfLine(r):
   120  			l.Ignore()
   121  		case isNameBegin(r):
   122  			return lexNameUpsertOp
   123  		case r == '#':
   124  			return lexComment
   125  		case r == lex.EOF:
   126  			return l.Errorf("Unclosed upsert block")
   127  		default:
   128  			return l.Errorf("Unrecognized character in upsert block: %#U", r)
   129  		}
   130  	}
   131  }
   132  
   133  // lexNameUpsertOp parses the operation names inside upsert block
   134  func lexNameUpsertOp(l *lex.Lexer) lex.StateFn {
   135  	// The caller already checked isNameBegin, and absorbed one rune.
   136  	l.AcceptRun(isNameSuffix)
   137  	word := l.Input[l.Start:l.Pos]
   138  	switch word {
   139  	case "query":
   140  		l.Emit(itemUpsertBlockOp)
   141  		return lexBlockContent
   142  	case "mutation":
   143  		l.Emit(itemUpsertBlockOp)
   144  		return lexInsideMutation
   145  	case "fragment":
   146  		l.Emit(itemUpsertBlockOp)
   147  		return lexBlockContent
   148  	default:
   149  		return l.Errorf("Invalid operation type: %s", word)
   150  	}
   151  }
   152  
   153  // lexBlockContent lexes and absorbs the text inside a block (covered by braces).
   154  func lexBlockContent(l *lex.Lexer) lex.StateFn {
   155  	return lexContent(l, leftCurl, rightCurl, lexUpsertBlock)
   156  }
   157  
   158  // lexIfContent lexes the whole of @if directive in a mutation block (covered by small brackets)
   159  func lexIfContent(l *lex.Lexer) lex.StateFn {
   160  	if r := l.Next(); r != at {
   161  		return l.Errorf("Expected [@], found; [%#U]", r)
   162  	}
   163  
   164  	l.AcceptRun(isNameSuffix)
   165  	word := l.Input[l.Start:l.Pos]
   166  	if word != "@if" {
   167  		return l.Errorf("Expected @if, found [%v]", word)
   168  	}
   169  
   170  	return lexContent(l, '(', ')', lexInsideMutation)
   171  }
   172  
   173  func lexContent(l *lex.Lexer, leftRune, rightRune rune, returnTo lex.StateFn) lex.StateFn {
   174  	depth := 0
   175  	for {
   176  		switch l.Next() {
   177  		case lex.EOF:
   178  			return l.Errorf("Matching brackets not found")
   179  		case quote:
   180  			if err := l.LexQuotedString(); err != nil {
   181  				return l.Errorf(err.Error())
   182  			}
   183  		case leftRune:
   184  			depth++
   185  		case rightRune:
   186  			depth--
   187  			if depth < 0 {
   188  				return l.Errorf("Unopened %c found", rightRune)
   189  			} else if depth == 0 {
   190  				l.Emit(itemUpsertBlockOpContent)
   191  				return returnTo
   192  			}
   193  		}
   194  	}
   195  
   196  }
   197  
   198  func lexInsideMutation(l *lex.Lexer) lex.StateFn {
   199  	l.Mode = lexInsideMutation
   200  	for {
   201  		switch r := l.Next(); {
   202  		case r == at:
   203  			l.Backup()
   204  			return lexIfContent
   205  		case r == rightCurl:
   206  			l.Depth--
   207  			l.Emit(itemRightCurl)
   208  			if l.Depth == 0 {
   209  				return lexTopLevel
   210  			}
   211  		case r == leftCurl:
   212  			l.Depth++
   213  			l.Emit(itemLeftCurl)
   214  			if l.Depth >= 2 {
   215  				return lexTextMutation
   216  			}
   217  		case isSpace(r) || lex.IsEndOfLine(r):
   218  			l.Ignore()
   219  		case isNameBegin(r):
   220  			return lexNameMutation
   221  		case r == '#':
   222  			return lexComment
   223  		case r == lex.EOF:
   224  			return l.Errorf("Unclosed mutation action")
   225  		default:
   226  			return l.Errorf("Unrecognized character inside mutation: %#U", r)
   227  		}
   228  	}
   229  }
   230  
   231  func lexInsideSchema(l *lex.Lexer) lex.StateFn {
   232  	l.Mode = lexInsideSchema
   233  	for {
   234  		switch r := l.Next(); {
   235  		case r == rightRound:
   236  			l.Emit(itemRightRound)
   237  		case r == leftRound:
   238  			l.Emit(itemLeftRound)
   239  		case r == rightCurl:
   240  			l.Depth--
   241  			l.Emit(itemRightCurl)
   242  			if l.Depth == 0 {
   243  				return lexTopLevel
   244  			}
   245  		case r == leftCurl:
   246  			l.Depth++
   247  			l.Emit(itemLeftCurl)
   248  		case r == leftSquare:
   249  			l.Emit(itemLeftSquare)
   250  		case r == rightSquare:
   251  			l.Emit(itemRightSquare)
   252  		case isSpace(r) || lex.IsEndOfLine(r):
   253  			l.Ignore()
   254  		case isNameBegin(r):
   255  			return lexArgName
   256  		case r == '#':
   257  			return lexComment
   258  		case r == colon:
   259  			l.Emit(itemColon)
   260  		case r == comma:
   261  			l.Emit(itemComma)
   262  		case r == lex.EOF:
   263  			return l.Errorf("Unclosed schema action")
   264  		default:
   265  			return l.Errorf("Unrecognized character inside schema: %#U", r)
   266  		}
   267  	}
   268  }
   269  
   270  func lexFuncOrArg(l *lex.Lexer) lex.StateFn {
   271  	l.Mode = lexFuncOrArg
   272  	var empty bool
   273  	for {
   274  		switch r := l.Next(); {
   275  		case r == at:
   276  			l.Emit(itemAt)
   277  			return lexDirectiveOrLangList
   278  		case isNameBegin(r) || isNumber(r):
   279  			return lexArgName
   280  		case r == slash:
   281  			// if argument starts with '/' it's a regex, otherwise it's a division
   282  			if empty {
   283  				return lexRegex(l)
   284  			}
   285  			fallthrough
   286  		case isMathOp(r):
   287  			l.Emit(itemMathOp)
   288  		case isInequalityOp(r):
   289  			if r == equal {
   290  				if !isInequalityOp(l.Peek()) {
   291  					l.Emit(itemEqual)
   292  					continue
   293  				}
   294  			}
   295  			if r == lsThan {
   296  				if !isSpace(l.Peek()) && l.Peek() != '=' {
   297  					// as long as its not '=' or ' '
   298  					return lexIRIRef
   299  				}
   300  			}
   301  			if isInequalityOp(l.Peek()) {
   302  				l.Next()
   303  			}
   304  			l.Emit(itemMathOp)
   305  		case r == leftRound:
   306  			l.Emit(itemLeftRound)
   307  			l.ArgDepth++
   308  		case r == rightRound:
   309  			if l.ArgDepth == 0 {
   310  				return l.Errorf("Unexpected right round bracket")
   311  			}
   312  			l.ArgDepth--
   313  			l.Emit(itemRightRound)
   314  			if empty {
   315  				return l.Errorf("Empty Argument")
   316  			}
   317  			if l.ArgDepth == 0 {
   318  				return lexQuery // Filter directive is done.
   319  			}
   320  		case r == lex.EOF:
   321  			return l.Errorf("Unclosed Brackets")
   322  		case isSpace(r) || lex.IsEndOfLine(r):
   323  			l.Ignore()
   324  		case r == comma:
   325  			if empty {
   326  				return l.Errorf("Consecutive commas not allowed.")
   327  			}
   328  			empty = true
   329  			l.Emit(itemComma)
   330  		case isDollar(r):
   331  			l.Emit(itemDollar)
   332  		case r == colon:
   333  			l.Emit(itemColon)
   334  		case r == quote:
   335  			{
   336  				empty = false
   337  				if err := l.LexQuotedString(); err != nil {
   338  					return l.Errorf(err.Error())
   339  				}
   340  				l.Emit(itemName)
   341  			}
   342  		case isEndLiteral(r):
   343  			{
   344  				empty = false
   345  				l.AcceptUntil(isEndLiteral) // This call will backup the ending ".
   346  				l.Next()                    // Consume the " .
   347  				l.Emit(itemName)
   348  			}
   349  		case r == leftSquare:
   350  			l.Emit(itemLeftSquare)
   351  		case r == rightSquare:
   352  			l.Emit(itemRightSquare)
   353  		case r == '#':
   354  			return lexComment
   355  		case r == '.':
   356  			l.Emit(itemPeriod)
   357  		default:
   358  			return l.Errorf("Unrecognized character inside a func: %#U", r)
   359  		}
   360  	}
   361  }
   362  
   363  func lexTopLevel(l *lex.Lexer) lex.StateFn {
   364  	// TODO(Aman): Find a way to identify different blocks in future. We only have
   365  	// Upsert block right now. BlockDepth tells us nesting of blocks. Currently, only
   366  	// the Upsert block has nested mutation/query/fragment blocks.
   367  	if l.BlockDepth != 0 {
   368  		return lexUpsertBlock
   369  	}
   370  
   371  	l.Mode = lexTopLevel
   372  Loop:
   373  	for {
   374  		switch r := l.Next(); {
   375  		case r == leftCurl:
   376  			l.Depth++ // one level down.
   377  			l.Emit(itemLeftCurl)
   378  			return lexQuery
   379  		case r == rightCurl:
   380  			return l.Errorf("Too many right curl")
   381  		case r == lex.EOF:
   382  			break Loop
   383  		case r == '#':
   384  			return lexComment
   385  		case r == leftRound:
   386  			l.Backup()
   387  			l.Emit(itemText)
   388  			l.Next()
   389  			l.Emit(itemLeftRound)
   390  			l.ArgDepth++
   391  			return lexQuery
   392  		case isSpace(r) || lex.IsEndOfLine(r):
   393  			l.Ignore()
   394  		case isNameBegin(r):
   395  			l.Backup()
   396  			return lexOperationType
   397  		}
   398  	}
   399  	if l.Pos > l.Start {
   400  		l.Emit(itemText)
   401  	}
   402  	l.Emit(lex.ItemEOF)
   403  	return nil
   404  }
   405  
   406  // lexQuery lexes the input string and calls other lex functions.
   407  func lexQuery(l *lex.Lexer) lex.StateFn {
   408  	l.Mode = lexQuery
   409  	for {
   410  		switch r := l.Next(); {
   411  		case r == period:
   412  			l.Emit(itemPeriod)
   413  		case r == rightCurl:
   414  			l.Depth--
   415  			l.Emit(itemRightCurl)
   416  			if l.Depth == 0 {
   417  				return lexTopLevel
   418  			}
   419  		case r == leftCurl:
   420  			l.Depth++
   421  			l.Emit(itemLeftCurl)
   422  		case r == lex.EOF:
   423  			return l.Errorf("Unclosed action")
   424  		case isSpace(r) || lex.IsEndOfLine(r):
   425  			l.Ignore()
   426  		case r == comma:
   427  			l.Emit(itemComma)
   428  		case isNameBegin(r):
   429  			return lexName
   430  		case r == '#':
   431  			return lexComment
   432  		case r == '-':
   433  			l.Emit(itemMathOp)
   434  		case r == leftRound:
   435  			l.Emit(itemLeftRound)
   436  			l.AcceptRun(isSpace)
   437  			l.Ignore()
   438  			l.ArgDepth++
   439  			return lexFuncOrArg
   440  		case r == colon:
   441  			l.Emit(itemColon)
   442  		case r == at:
   443  			l.Emit(itemAt)
   444  			return lexDirectiveOrLangList
   445  		case r == lsThan:
   446  			return lexIRIRef
   447  		default:
   448  			return l.Errorf("Unrecognized character in lexText: %#U", r)
   449  		}
   450  	}
   451  }
   452  
   453  func lexIRIRef(l *lex.Lexer) lex.StateFn {
   454  	if err := lex.IRIRef(l, itemName); err != nil {
   455  		return l.Errorf(err.Error())
   456  	}
   457  	return l.Mode
   458  }
   459  
   460  // lexDirectiveOrLangList is called right after we see a @.
   461  func lexDirectiveOrLangList(l *lex.Lexer) lex.StateFn {
   462  	r := l.Next()
   463  	// Check first character.
   464  	if !isNameBegin(r) && r != period && r != star {
   465  		return l.Errorf("Unrecognized character in lexDirective: %#U", r)
   466  	}
   467  	l.Backup()
   468  
   469  	for {
   470  		r := l.Next()
   471  		if r == period {
   472  			l.Emit(itemName)
   473  			return l.Mode
   474  		}
   475  		if isLangOrDirective(r) {
   476  			continue
   477  		}
   478  		l.Backup()
   479  		l.Emit(itemName)
   480  		break
   481  	}
   482  	return l.Mode
   483  }
   484  
   485  func lexName(l *lex.Lexer) lex.StateFn {
   486  	l.AcceptRun(isNameSuffix)
   487  	l.Emit(itemName)
   488  	return l.Mode
   489  }
   490  
   491  // lexComment lexes a comment text.
   492  func lexComment(l *lex.Lexer) lex.StateFn {
   493  	for {
   494  		r := l.Next()
   495  		if lex.IsEndOfLine(r) {
   496  			l.Ignore()
   497  			return l.Mode
   498  		}
   499  		if r == lex.EOF {
   500  			break
   501  		}
   502  	}
   503  	l.Ignore()
   504  	l.Emit(lex.ItemEOF)
   505  	return l.Mode
   506  }
   507  
   508  // lexNameMutation lexes the itemMutationOp, which could be set or delete.
   509  func lexNameMutation(l *lex.Lexer) lex.StateFn {
   510  	for {
   511  		// The caller already checked isNameBegin, and absorbed one rune.
   512  		r := l.Next()
   513  		if isNameSuffix(r) {
   514  			continue
   515  		}
   516  		l.Backup()
   517  		l.Emit(itemMutationOp)
   518  		break
   519  	}
   520  	return l.Mode
   521  }
   522  
   523  // lexTextMutation lexes and absorbs the text inside a mutation operation block.
   524  func lexTextMutation(l *lex.Lexer) lex.StateFn {
   525  	for {
   526  		r := l.Next()
   527  		if r == lex.EOF {
   528  			return l.Errorf("Unclosed mutation text")
   529  		}
   530  		if r == quote {
   531  			return lexMutationValue
   532  		}
   533  		if r == leftCurl {
   534  			return l.Errorf("Invalid character '{' inside mutation text")
   535  		}
   536  		if r != rightCurl {
   537  			// Absorb everything until we find '}'.
   538  			continue
   539  		}
   540  		l.Backup()
   541  		l.Emit(itemMutationOpContent)
   542  		break
   543  	}
   544  	return lexInsideMutation
   545  }
   546  
   547  // This function is used to absorb the object value.
   548  func lexMutationValue(l *lex.Lexer) lex.StateFn {
   549  LOOP:
   550  	for {
   551  		r := l.Next()
   552  		switch r {
   553  		case lex.EOF:
   554  			return l.Errorf("Unclosed mutation value")
   555  		case quote:
   556  			break LOOP
   557  		case '\\':
   558  			l.Next() // skip one.
   559  		}
   560  	}
   561  	return lexTextMutation
   562  }
   563  
   564  func lexRegex(l *lex.Lexer) lex.StateFn {
   565  LOOP:
   566  	for {
   567  		r := l.Next()
   568  		switch r {
   569  		case lex.EOF:
   570  			return l.Errorf("Unclosed regexp")
   571  		case '\\':
   572  			l.Next()
   573  		case '/':
   574  			break LOOP
   575  		}
   576  	}
   577  	l.AcceptRun(isRegexFlag)
   578  	l.Emit(itemRegex)
   579  	return l.Mode
   580  }
   581  
   582  // lexOperationType lexes a query or mutation or schema operation type.
   583  func lexOperationType(l *lex.Lexer) lex.StateFn {
   584  	l.AcceptRun(isNameSuffix)
   585  	// l.Pos would be index of the end of operation type + 1.
   586  	word := l.Input[l.Start:l.Pos]
   587  	if word == "mutation" {
   588  		l.Emit(itemOpType)
   589  		return lexInsideMutation
   590  	} else if word == "fragment" {
   591  		l.Emit(itemOpType)
   592  		return lexQuery
   593  	} else if word == "query" {
   594  		l.Emit(itemOpType)
   595  		return lexQuery
   596  	} else if word == "schema" {
   597  		l.Emit(itemOpType)
   598  		return lexInsideSchema
   599  	} else {
   600  		return l.Errorf("Invalid operation type: %s", word)
   601  	}
   602  }
   603  
   604  // lexArgName lexes and emits the name part of an argument.
   605  func lexArgName(l *lex.Lexer) lex.StateFn {
   606  	l.AcceptRun(isNameSuffix)
   607  	l.Emit(itemName)
   608  	return l.Mode
   609  }
   610  
   611  // isDollar returns true if the rune is a Dollar($).
   612  func isDollar(r rune) bool {
   613  	return r == '$' || r == '\u0024'
   614  }
   615  
   616  // isSpace returns true if the rune is a tab or space.
   617  func isSpace(r rune) bool {
   618  	return r == '\u0009' || r == '\u0020'
   619  }
   620  
   621  // isEndLiteral returns true if rune is quotation mark.
   622  func isEndLiteral(r rune) bool {
   623  	return r == '"' || r == '\u000d' || r == '\u000a'
   624  }
   625  
   626  func isLangOrDirective(r rune) bool {
   627  	if isNameBegin(r) {
   628  		return true
   629  	}
   630  	if r == '-' {
   631  		return true
   632  	}
   633  	if r >= '0' && r <= '9' {
   634  		return true
   635  	}
   636  	if r == '*' {
   637  		return true
   638  	}
   639  	return false
   640  }
   641  
   642  // isNameBegin returns true if the rune is an alphabet or an '_' or '~'.
   643  func isNameBegin(r rune) bool {
   644  	switch {
   645  	case r >= 'a' && r <= 'z':
   646  		return true
   647  	case r >= 'A' && r <= 'Z':
   648  		return true
   649  	case r == '_':
   650  		return true
   651  	case r == '~':
   652  		return true
   653  	default:
   654  		return false
   655  	}
   656  }
   657  
   658  func isMathOp(r rune) bool {
   659  	switch r {
   660  	case '+', '-', '*', '/', '%':
   661  		return true
   662  	default:
   663  		return false
   664  	}
   665  }
   666  
   667  func isInequalityOp(r rune) bool {
   668  	switch r {
   669  	case '<', '>', '=', '!':
   670  		return true
   671  	default:
   672  		return false
   673  	}
   674  }
   675  
   676  func isNumber(r rune) bool {
   677  	switch {
   678  	case (r >= '0' && r <= '9'):
   679  		return true
   680  	default:
   681  		return false
   682  	}
   683  }
   684  
   685  func isNameSuffix(r rune) bool {
   686  	if isMathOp(r) {
   687  		return false
   688  	}
   689  	if isNameBegin(r) || isNumber(r) {
   690  		return true
   691  	}
   692  	if r == '.' /*|| r == '!'*/ { // Use by freebase.
   693  		return true
   694  	}
   695  	return false
   696  }
   697  
   698  func isRegexFlag(r rune) bool {
   699  	switch {
   700  	case r >= 'a' && r <= 'z':
   701  		return true
   702  	case r >= 'A' && r <= 'Z':
   703  		return true
   704  	default:
   705  		return false
   706  	}
   707  }