github.com/maps90/godog@v0.7.5-0.20170923143419-0093943021d4/gherkin/matcher.go (about)

     1  package gherkin
     2  
     3  import (
     4  	"regexp"
     5  	"strings"
     6  	"unicode/utf8"
     7  )
     8  
     9  const (
    10  	DEFAULT_DIALECT                 = "en"
    11  	COMMENT_PREFIX                  = "#"
    12  	TAG_PREFIX                      = "@"
    13  	TITLE_KEYWORD_SEPARATOR         = ":"
    14  	TABLE_CELL_SEPARATOR            = '|'
    15  	ESCAPE_CHAR                     = '\\'
    16  	ESCAPED_NEWLINE                 = 'n'
    17  	DOCSTRING_SEPARATOR             = "\"\"\""
    18  	DOCSTRING_ALTERNATIVE_SEPARATOR = "```"
    19  )
    20  
    21  type matcher struct {
    22  	gdp                      GherkinDialectProvider
    23  	default_lang             string
    24  	lang                     string
    25  	dialect                  *GherkinDialect
    26  	activeDocStringSeparator string
    27  	indentToRemove           int
    28  	languagePattern          *regexp.Regexp
    29  }
    30  
    31  func NewMatcher(gdp GherkinDialectProvider) Matcher {
    32  	return &matcher{
    33  		gdp:             gdp,
    34  		default_lang:    DEFAULT_DIALECT,
    35  		lang:            DEFAULT_DIALECT,
    36  		dialect:         gdp.GetDialect(DEFAULT_DIALECT),
    37  		languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"),
    38  	}
    39  }
    40  
    41  func NewLanguageMatcher(gdp GherkinDialectProvider, language string) Matcher {
    42  	return &matcher{
    43  		gdp:             gdp,
    44  		default_lang:    language,
    45  		lang:            language,
    46  		dialect:         gdp.GetDialect(language),
    47  		languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"),
    48  	}
    49  }
    50  
    51  func (m *matcher) Reset() {
    52  	m.indentToRemove = 0
    53  	m.activeDocStringSeparator = ""
    54  	if m.lang != "en" {
    55  		m.dialect = m.gdp.GetDialect(m.default_lang)
    56  		m.lang = "en"
    57  	}
    58  }
    59  
    60  func (m *matcher) newTokenAtLocation(line, index int) (token *Token) {
    61  	column := index + 1
    62  	token = new(Token)
    63  	token.GherkinDialect = m.lang
    64  	token.Location = &Location{line, column}
    65  	return
    66  }
    67  
    68  func (m *matcher) MatchEOF(line *Line) (ok bool, token *Token, err error) {
    69  	if line.IsEof() {
    70  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
    71  		token.Type = TokenType_EOF
    72  	}
    73  	return
    74  }
    75  
    76  func (m *matcher) MatchEmpty(line *Line) (ok bool, token *Token, err error) {
    77  	if line.IsEmpty() {
    78  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
    79  		token.Type = TokenType_Empty
    80  	}
    81  	return
    82  }
    83  
    84  func (m *matcher) MatchComment(line *Line) (ok bool, token *Token, err error) {
    85  	if line.StartsWith(COMMENT_PREFIX) {
    86  		token, ok = m.newTokenAtLocation(line.LineNumber, 0), true
    87  		token.Type = TokenType_Comment
    88  		token.Text = line.LineText
    89  	}
    90  	return
    91  }
    92  
    93  func (m *matcher) MatchTagLine(line *Line) (ok bool, token *Token, err error) {
    94  	if line.StartsWith(TAG_PREFIX) {
    95  		var tags []*LineSpan
    96  		var column = line.Indent()
    97  		splits := strings.Split(line.TrimmedLineText, TAG_PREFIX)
    98  		for i := range splits {
    99  			txt := strings.Trim(splits[i], " ")
   100  			if txt != "" {
   101  				tags = append(tags, &LineSpan{column, TAG_PREFIX + txt})
   102  			}
   103  			column = column + len(splits[i]) + 1
   104  		}
   105  
   106  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   107  		token.Type = TokenType_TagLine
   108  		token.Items = tags
   109  	}
   110  	return
   111  }
   112  
   113  func (m *matcher) matchTitleLine(line *Line, tokenType TokenType, keywords []string) (ok bool, token *Token, err error) {
   114  	for i := range keywords {
   115  		keyword := keywords[i]
   116  		if line.StartsWith(keyword + TITLE_KEYWORD_SEPARATOR) {
   117  			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   118  			token.Type = tokenType
   119  			token.Keyword = keyword
   120  			token.Text = strings.Trim(line.TrimmedLineText[len(keyword)+1:], " ")
   121  			return
   122  		}
   123  	}
   124  	return
   125  }
   126  
   127  func (m *matcher) MatchFeatureLine(line *Line) (ok bool, token *Token, err error) {
   128  	return m.matchTitleLine(line, TokenType_FeatureLine, m.dialect.FeatureKeywords())
   129  }
   130  func (m *matcher) MatchBackgroundLine(line *Line) (ok bool, token *Token, err error) {
   131  	return m.matchTitleLine(line, TokenType_BackgroundLine, m.dialect.BackgroundKeywords())
   132  }
   133  func (m *matcher) MatchScenarioLine(line *Line) (ok bool, token *Token, err error) {
   134  	return m.matchTitleLine(line, TokenType_ScenarioLine, m.dialect.ScenarioKeywords())
   135  }
   136  func (m *matcher) MatchScenarioOutlineLine(line *Line) (ok bool, token *Token, err error) {
   137  	return m.matchTitleLine(line, TokenType_ScenarioOutlineLine, m.dialect.ScenarioOutlineKeywords())
   138  }
   139  func (m *matcher) MatchExamplesLine(line *Line) (ok bool, token *Token, err error) {
   140  	return m.matchTitleLine(line, TokenType_ExamplesLine, m.dialect.ExamplesKeywords())
   141  }
   142  func (m *matcher) MatchStepLine(line *Line) (ok bool, token *Token, err error) {
   143  	keywords := m.dialect.StepKeywords()
   144  	for i := range keywords {
   145  		keyword := keywords[i]
   146  		if line.StartsWith(keyword) {
   147  			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   148  			token.Type = TokenType_StepLine
   149  			token.Keyword = keyword
   150  			token.Text = strings.Trim(line.TrimmedLineText[len(keyword):], " ")
   151  			return
   152  		}
   153  	}
   154  	return
   155  }
   156  
   157  func (m *matcher) MatchDocStringSeparator(line *Line) (ok bool, token *Token, err error) {
   158  	if m.activeDocStringSeparator != "" {
   159  		if line.StartsWith(m.activeDocStringSeparator) {
   160  			// close
   161  			token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   162  			token.Type = TokenType_DocStringSeparator
   163  
   164  			m.indentToRemove = 0
   165  			m.activeDocStringSeparator = ""
   166  		}
   167  		return
   168  	}
   169  	if line.StartsWith(DOCSTRING_SEPARATOR) {
   170  		m.activeDocStringSeparator = DOCSTRING_SEPARATOR
   171  	} else if line.StartsWith(DOCSTRING_ALTERNATIVE_SEPARATOR) {
   172  		m.activeDocStringSeparator = DOCSTRING_ALTERNATIVE_SEPARATOR
   173  	}
   174  	if m.activeDocStringSeparator != "" {
   175  		// open
   176  		contentType := line.TrimmedLineText[len(m.activeDocStringSeparator):]
   177  		m.indentToRemove = line.Indent()
   178  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   179  		token.Type = TokenType_DocStringSeparator
   180  		token.Text = contentType
   181  	}
   182  	return
   183  }
   184  
   185  func (m *matcher) MatchTableRow(line *Line) (ok bool, token *Token, err error) {
   186  	var firstChar, firstPos = utf8.DecodeRuneInString(line.TrimmedLineText)
   187  	if firstChar == TABLE_CELL_SEPARATOR {
   188  		var cells []*LineSpan
   189  		var cell []rune
   190  		var startCol = line.Indent() + 2 // column where the current cell started
   191  		// start after the first separator, it's not included in the cell
   192  		for i, w, col := firstPos, 0, startCol; i < len(line.TrimmedLineText); i += w {
   193  			var char rune
   194  			char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:])
   195  			if char == TABLE_CELL_SEPARATOR {
   196  				// append current cell
   197  				txt := string(cell)
   198  				txtTrimmed := strings.TrimLeft(txt, " ")
   199  				ind := len(txt) - len(txtTrimmed)
   200  				cells = append(cells, &LineSpan{startCol + ind, strings.TrimRight(txtTrimmed, " ")})
   201  				// start building next
   202  				cell = make([]rune, 0)
   203  				startCol = col + 1
   204  			} else if char == ESCAPE_CHAR {
   205  				// skip this character but count the column
   206  				i += w
   207  				col++
   208  				char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:])
   209  				if char == ESCAPED_NEWLINE {
   210  					cell = append(cell, '\n')
   211  				} else {
   212  					if char != TABLE_CELL_SEPARATOR && char != ESCAPE_CHAR {
   213  						cell = append(cell, ESCAPE_CHAR)
   214  					}
   215  					cell = append(cell, char)
   216  				}
   217  			} else {
   218  				cell = append(cell, char)
   219  			}
   220  			col++
   221  		}
   222  
   223  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   224  		token.Type = TokenType_TableRow
   225  		token.Items = cells
   226  	}
   227  	return
   228  }
   229  
   230  func (m *matcher) MatchLanguage(line *Line) (ok bool, token *Token, err error) {
   231  	matches := m.languagePattern.FindStringSubmatch(line.TrimmedLineText)
   232  	if len(matches) > 0 {
   233  		lang := matches[1]
   234  		token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true
   235  		token.Type = TokenType_Language
   236  		token.Text = lang
   237  
   238  		dialect := m.gdp.GetDialect(lang)
   239  		if dialect == nil {
   240  			err = &parseError{"Language not supported: " + lang, token.Location}
   241  		} else {
   242  			m.lang = lang
   243  			m.dialect = dialect
   244  		}
   245  	}
   246  	return
   247  }
   248  
   249  func (m *matcher) MatchOther(line *Line) (ok bool, token *Token, err error) {
   250  	token, ok = m.newTokenAtLocation(line.LineNumber, 0), true
   251  	token.Type = TokenType_Other
   252  
   253  	element := line.LineText
   254  	txt := strings.TrimLeft(element, " ")
   255  
   256  	if len(element)-len(txt) > m.indentToRemove {
   257  		token.Text = m.unescapeDocString(element[m.indentToRemove:])
   258  	} else {
   259  		token.Text = m.unescapeDocString(txt)
   260  	}
   261  	return
   262  }
   263  
   264  func (m *matcher) unescapeDocString(text string) string {
   265  	if m.activeDocStringSeparator != "" {
   266  		return strings.Replace(text, "\\\"\\\"\\\"", "\"\"\"", -1)
   267  	} else {
   268  		return text
   269  	}
   270  }