github.com/maps90/godog@v0.7.5-0.20170923143419-0093943021d4/gherkin/matcher.go (about) 1 package gherkin 2 3 import ( 4 "regexp" 5 "strings" 6 "unicode/utf8" 7 ) 8 9 const ( 10 DEFAULT_DIALECT = "en" 11 COMMENT_PREFIX = "#" 12 TAG_PREFIX = "@" 13 TITLE_KEYWORD_SEPARATOR = ":" 14 TABLE_CELL_SEPARATOR = '|' 15 ESCAPE_CHAR = '\\' 16 ESCAPED_NEWLINE = 'n' 17 DOCSTRING_SEPARATOR = "\"\"\"" 18 DOCSTRING_ALTERNATIVE_SEPARATOR = "```" 19 ) 20 21 type matcher struct { 22 gdp GherkinDialectProvider 23 default_lang string 24 lang string 25 dialect *GherkinDialect 26 activeDocStringSeparator string 27 indentToRemove int 28 languagePattern *regexp.Regexp 29 } 30 31 func NewMatcher(gdp GherkinDialectProvider) Matcher { 32 return &matcher{ 33 gdp: gdp, 34 default_lang: DEFAULT_DIALECT, 35 lang: DEFAULT_DIALECT, 36 dialect: gdp.GetDialect(DEFAULT_DIALECT), 37 languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"), 38 } 39 } 40 41 func NewLanguageMatcher(gdp GherkinDialectProvider, language string) Matcher { 42 return &matcher{ 43 gdp: gdp, 44 default_lang: language, 45 lang: language, 46 dialect: gdp.GetDialect(language), 47 languagePattern: regexp.MustCompile("^\\s*#\\s*language\\s*:\\s*([a-zA-Z\\-_]+)\\s*$"), 48 } 49 } 50 51 func (m *matcher) Reset() { 52 m.indentToRemove = 0 53 m.activeDocStringSeparator = "" 54 if m.lang != "en" { 55 m.dialect = m.gdp.GetDialect(m.default_lang) 56 m.lang = "en" 57 } 58 } 59 60 func (m *matcher) newTokenAtLocation(line, index int) (token *Token) { 61 column := index + 1 62 token = new(Token) 63 token.GherkinDialect = m.lang 64 token.Location = &Location{line, column} 65 return 66 } 67 68 func (m *matcher) MatchEOF(line *Line) (ok bool, token *Token, err error) { 69 if line.IsEof() { 70 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 71 token.Type = TokenType_EOF 72 } 73 return 74 } 75 76 func (m *matcher) MatchEmpty(line *Line) (ok bool, token *Token, err error) { 77 if line.IsEmpty() { 78 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 79 token.Type = TokenType_Empty 80 } 81 return 82 } 83 84 func (m *matcher) MatchComment(line *Line) (ok bool, token *Token, err error) { 85 if line.StartsWith(COMMENT_PREFIX) { 86 token, ok = m.newTokenAtLocation(line.LineNumber, 0), true 87 token.Type = TokenType_Comment 88 token.Text = line.LineText 89 } 90 return 91 } 92 93 func (m *matcher) MatchTagLine(line *Line) (ok bool, token *Token, err error) { 94 if line.StartsWith(TAG_PREFIX) { 95 var tags []*LineSpan 96 var column = line.Indent() 97 splits := strings.Split(line.TrimmedLineText, TAG_PREFIX) 98 for i := range splits { 99 txt := strings.Trim(splits[i], " ") 100 if txt != "" { 101 tags = append(tags, &LineSpan{column, TAG_PREFIX + txt}) 102 } 103 column = column + len(splits[i]) + 1 104 } 105 106 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 107 token.Type = TokenType_TagLine 108 token.Items = tags 109 } 110 return 111 } 112 113 func (m *matcher) matchTitleLine(line *Line, tokenType TokenType, keywords []string) (ok bool, token *Token, err error) { 114 for i := range keywords { 115 keyword := keywords[i] 116 if line.StartsWith(keyword + TITLE_KEYWORD_SEPARATOR) { 117 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 118 token.Type = tokenType 119 token.Keyword = keyword 120 token.Text = strings.Trim(line.TrimmedLineText[len(keyword)+1:], " ") 121 return 122 } 123 } 124 return 125 } 126 127 func (m *matcher) MatchFeatureLine(line *Line) (ok bool, token *Token, err error) { 128 return m.matchTitleLine(line, TokenType_FeatureLine, m.dialect.FeatureKeywords()) 129 } 130 func (m *matcher) MatchBackgroundLine(line *Line) (ok bool, token *Token, err error) { 131 return m.matchTitleLine(line, TokenType_BackgroundLine, m.dialect.BackgroundKeywords()) 132 } 133 func (m *matcher) MatchScenarioLine(line *Line) (ok bool, token *Token, err error) { 134 return m.matchTitleLine(line, TokenType_ScenarioLine, m.dialect.ScenarioKeywords()) 135 } 136 func (m *matcher) MatchScenarioOutlineLine(line *Line) (ok bool, token *Token, err error) { 137 return m.matchTitleLine(line, TokenType_ScenarioOutlineLine, m.dialect.ScenarioOutlineKeywords()) 138 } 139 func (m *matcher) MatchExamplesLine(line *Line) (ok bool, token *Token, err error) { 140 return m.matchTitleLine(line, TokenType_ExamplesLine, m.dialect.ExamplesKeywords()) 141 } 142 func (m *matcher) MatchStepLine(line *Line) (ok bool, token *Token, err error) { 143 keywords := m.dialect.StepKeywords() 144 for i := range keywords { 145 keyword := keywords[i] 146 if line.StartsWith(keyword) { 147 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 148 token.Type = TokenType_StepLine 149 token.Keyword = keyword 150 token.Text = strings.Trim(line.TrimmedLineText[len(keyword):], " ") 151 return 152 } 153 } 154 return 155 } 156 157 func (m *matcher) MatchDocStringSeparator(line *Line) (ok bool, token *Token, err error) { 158 if m.activeDocStringSeparator != "" { 159 if line.StartsWith(m.activeDocStringSeparator) { 160 // close 161 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 162 token.Type = TokenType_DocStringSeparator 163 164 m.indentToRemove = 0 165 m.activeDocStringSeparator = "" 166 } 167 return 168 } 169 if line.StartsWith(DOCSTRING_SEPARATOR) { 170 m.activeDocStringSeparator = DOCSTRING_SEPARATOR 171 } else if line.StartsWith(DOCSTRING_ALTERNATIVE_SEPARATOR) { 172 m.activeDocStringSeparator = DOCSTRING_ALTERNATIVE_SEPARATOR 173 } 174 if m.activeDocStringSeparator != "" { 175 // open 176 contentType := line.TrimmedLineText[len(m.activeDocStringSeparator):] 177 m.indentToRemove = line.Indent() 178 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 179 token.Type = TokenType_DocStringSeparator 180 token.Text = contentType 181 } 182 return 183 } 184 185 func (m *matcher) MatchTableRow(line *Line) (ok bool, token *Token, err error) { 186 var firstChar, firstPos = utf8.DecodeRuneInString(line.TrimmedLineText) 187 if firstChar == TABLE_CELL_SEPARATOR { 188 var cells []*LineSpan 189 var cell []rune 190 var startCol = line.Indent() + 2 // column where the current cell started 191 // start after the first separator, it's not included in the cell 192 for i, w, col := firstPos, 0, startCol; i < len(line.TrimmedLineText); i += w { 193 var char rune 194 char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:]) 195 if char == TABLE_CELL_SEPARATOR { 196 // append current cell 197 txt := string(cell) 198 txtTrimmed := strings.TrimLeft(txt, " ") 199 ind := len(txt) - len(txtTrimmed) 200 cells = append(cells, &LineSpan{startCol + ind, strings.TrimRight(txtTrimmed, " ")}) 201 // start building next 202 cell = make([]rune, 0) 203 startCol = col + 1 204 } else if char == ESCAPE_CHAR { 205 // skip this character but count the column 206 i += w 207 col++ 208 char, w = utf8.DecodeRuneInString(line.TrimmedLineText[i:]) 209 if char == ESCAPED_NEWLINE { 210 cell = append(cell, '\n') 211 } else { 212 if char != TABLE_CELL_SEPARATOR && char != ESCAPE_CHAR { 213 cell = append(cell, ESCAPE_CHAR) 214 } 215 cell = append(cell, char) 216 } 217 } else { 218 cell = append(cell, char) 219 } 220 col++ 221 } 222 223 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 224 token.Type = TokenType_TableRow 225 token.Items = cells 226 } 227 return 228 } 229 230 func (m *matcher) MatchLanguage(line *Line) (ok bool, token *Token, err error) { 231 matches := m.languagePattern.FindStringSubmatch(line.TrimmedLineText) 232 if len(matches) > 0 { 233 lang := matches[1] 234 token, ok = m.newTokenAtLocation(line.LineNumber, line.Indent()), true 235 token.Type = TokenType_Language 236 token.Text = lang 237 238 dialect := m.gdp.GetDialect(lang) 239 if dialect == nil { 240 err = &parseError{"Language not supported: " + lang, token.Location} 241 } else { 242 m.lang = lang 243 m.dialect = dialect 244 } 245 } 246 return 247 } 248 249 func (m *matcher) MatchOther(line *Line) (ok bool, token *Token, err error) { 250 token, ok = m.newTokenAtLocation(line.LineNumber, 0), true 251 token.Type = TokenType_Other 252 253 element := line.LineText 254 txt := strings.TrimLeft(element, " ") 255 256 if len(element)-len(txt) > m.indentToRemove { 257 token.Text = m.unescapeDocString(element[m.indentToRemove:]) 258 } else { 259 token.Text = m.unescapeDocString(txt) 260 } 261 return 262 } 263 264 func (m *matcher) unescapeDocString(text string) string { 265 if m.activeDocStringSeparator != "" { 266 return strings.Replace(text, "\\\"\\\"\\\"", "\"\"\"", -1) 267 } else { 268 return text 269 } 270 }