github.com/Mericusta/go-stp@v0.6.8/matcher.go (about) 1 package stp 2 3 import ( 4 "fmt" 5 "strings" 6 ) 7 8 var ( 9 punctuationMarkMap map[rune]rune = map[rune]rune{ 10 '(': ')', ')': '(', 11 '{': '}', '}': '{', 12 '[': ']', ']': '[', 13 '"': '"', '\'': '\'', '`': '`', 14 } 15 PunctuationMarkLeftDoubleQuotes rune = '"' 16 PunctuationMarkRightDoubleQuotes rune = '"' 17 PunctuationMarkLeftInverseQuotes rune = '`' 18 PunctuationMarkRightInverseQuotes rune = '`' 19 PunctuationMarkLeftSingleQuotes rune = '\'' 20 PunctuationMarkRightSingleQuotes rune = '\'' 21 PunctuationMarkLeftBracket rune = '(' 22 PunctuationMarkRightBracket rune = ')' 23 PunctuationMarkLeftCurlyBracket rune = '{' 24 PunctuationMarkRightCurlyBracket rune = '}' 25 PunctuationMarkLeftSquareBracket rune = '[' 26 PunctuationMarkRightSquareBracket rune = ']' 27 PunctuationMarkPoint rune = '.' 28 ASCIISpace rune = ' ' 29 InvalidScopePunctuationMarkMap = map[rune]rune{ 30 PunctuationMarkLeftDoubleQuotes: PunctuationMarkRightDoubleQuotes, 31 PunctuationMarkLeftInverseQuotes: PunctuationMarkRightInverseQuotes, 32 PunctuationMarkLeftSingleQuotes: PunctuationMarkRightSingleQuotes, 33 } 34 ScopePunctuationMarkList = []int{ 35 PunctuationMarkBracket, 36 PunctuationMarkCurlyBracket, 37 PunctuationMarkSquareBracket, 38 } 39 ) 40 41 // GetAnotherPunctuationMark 获取标点符号的另一对 42 func GetAnotherPunctuationMark(r rune) rune { 43 if markRune, hasMark := punctuationMarkMap[r]; hasMark { 44 return markRune 45 } 46 return ' ' 47 } 48 49 func GetLeftPunctuationMarkList() []rune { 50 leftPunctuationMarkList := []rune{ 51 '(', '{', '[', '"', '\'', '`', 52 } 53 return leftPunctuationMarkList 54 } 55 56 func IsSpaceRune(r rune) bool { 57 return r == ' ' || r == '\n' || r == '\t' 58 } 59 60 func IsCharacter(r rune) bool { 61 return ('0' <= r && r <= '9') || ('a' <= r && r <= 'z') || ('A' <= r && r <= 'Z') || r == '_' 62 } 63 64 // CalculatePunctuationMarksContentLength 计算成对符号的内容长度,去除结束符号 65 // @contentAfterLeftPunctuationMark 待计算的字符串,不包括起始符号 66 // @leftPunctuationMark 符号左边界字符 必须是 (, [, {, ", ', ` 之一 67 // @rightPunctuationMark 符号右边界字符 68 // @invalidScopePunctuationMarkMap 排除计算的边界符号 69 // @return 不包含左右边界的内容的长度 70 func CalculatePunctuationMarksContentLength(contentAfterLeftPunctuationMark string, leftPunctuationMark, rightPunctuationMark rune, invalidScopePunctuationMarkMap map[rune]rune) int { 71 byteCount := 0 72 leftRuneCount := 1 73 rightRuneCount := 0 74 isValid := true 75 var invalidScopePunctuationMark rune = -1 76 var runeLength int 77 strings.IndexFunc(contentAfterLeftPunctuationMark, func(r rune) bool { 78 runeLength = len([]byte(string(r))) 79 byteCount += runeLength 80 81 // fmt.Printf("rune %v, string = %v, byte = %v\n", r, string(r), []byte(string(r))) 82 83 // end invalid scope 84 if !isValid && r == invalidScopePunctuationMark { 85 isValid = true 86 invalidScopePunctuationMark = -1 87 return false 88 } 89 90 // in invalid scope 91 if !isValid { 92 return false 93 } 94 95 // begin invalid scope 96 if punctuationMark, isInvalidScopePunctuationMark := invalidScopePunctuationMarkMap[r]; isValid && isInvalidScopePunctuationMark { 97 isValid = false 98 invalidScopePunctuationMark = punctuationMark 99 return false 100 } 101 102 // out invalid scope 103 if r == leftPunctuationMark { 104 leftRuneCount++ 105 } else if r == rightPunctuationMark { 106 rightRuneCount++ 107 } 108 109 return leftRuneCount == rightRuneCount 110 }) 111 return byteCount - len([]byte(string(rightPunctuationMark))) // // cut right punctuation mark len 112 } 113 114 // GetScopeContentBetweenPunctuationMarks 获取成对标点符号的内容 115 // @content 待查找的内容 116 // @scopeBeginIndex 左边界起始符号的下标,[]byte 数组的下标 117 // @return 不包含左右边界的内容 118 func GetScopeContentBetweenPunctuationMarks(content string, scopeBeginIndex int, scopeBeginRune, scopeEndRune rune) string { 119 runeLength := len([]byte(string(scopeBeginRune))) 120 scopeContentLength := CalculatePunctuationMarksContentLength( 121 content[scopeBeginIndex+runeLength:], scopeBeginRune, scopeEndRune, 122 InvalidScopePunctuationMarkMap, 123 ) 124 // 直接 content[x:y] 是按照 []byte[x:y] 来处理的 125 return content[scopeBeginIndex+runeLength : scopeBeginIndex+runeLength+scopeContentLength] 126 } 127 128 // // GetScopeContentBetweenPunctuationMarks 获取成对标点符号的内容 129 // // @content 待查找的内容 130 // // @scopeBeginIndex 左边界起始符号的下标,[]byte 数组的下标 131 // // @return 不包含左右边界的内容 132 // func GetScopeContentBetweenPunctuationMarks(content string, scopeBeginIndex int) string { 133 // scopeBeginRune := rune(content[scopeBeginIndex]) 134 // scopeEndRune := GetAnotherPunctuationMark(scopeBeginRune) 135 // scopeContentRuneLength := CalculatePunctuationMarksContentLength( 136 // string(content[scopeBeginIndex+1:]), 137 // scopeBeginRune, scopeEndRune, 138 // InvalidScopePunctuationMarkMap, 139 // ) 140 // // 直接 content[x:y] 是按照 []byte[x:y] 来处理的 141 // return string([]rune(content)[scopeBeginIndex+1 : scopeBeginIndex+1+scopeContentRuneLength]) 142 // } 143 144 // PunctuationIndex 成对标点符号的下标数据 145 type PunctuationIndex struct { 146 Left int 147 Right int 148 } 149 150 type PunctuationMarkInfo struct { 151 PunctuationMark rune 152 Index int 153 } 154 155 // NewPunctuationContent 成对标点符号的内容节点 156 type NewPunctuationContent struct { 157 Content string 158 LeftPunctuationMark *PunctuationMarkInfo 159 RightPunctuationMark *PunctuationMarkInfo 160 SubPunctuationContentList []*NewPunctuationContent 161 } 162 163 // 逻辑常量 164 const ( 165 PunctuationMarkQuote = 1 166 PunctuationMarkBracket = 2 167 PunctuationMarkCurlyBracket = 3 168 PunctuationMarkSquareBracket = 4 169 ) 170 171 // GetPunctuationMark 获取标点符号 172 func GetPunctuationMark(punctuationMark int) (rune, rune) { 173 switch punctuationMark { 174 case PunctuationMarkCurlyBracket: 175 return PunctuationMarkLeftCurlyBracket, PunctuationMarkRightCurlyBracket 176 case PunctuationMarkSquareBracket: 177 return PunctuationMarkLeftSquareBracket, PunctuationMarkRightSquareBracket 178 default: 179 return PunctuationMarkLeftBracket, PunctuationMarkRightBracket 180 } 181 } 182 183 // TraitMultiPunctuationMarksContent 混合成对标点符号的内容分类提取 184 func TraitMultiPunctuationMarksContent(content string, punctuationMarkList []int, maxDeep int) *NewPunctuationContent { 185 leftPunctuationMarkList := make([]rune, 0, len(punctuationMarkList)) 186 for _, punctuationMark := range punctuationMarkList { 187 leftPunctuationMark, _ := GetPunctuationMark(punctuationMark) 188 leftPunctuationMarkList = append(leftPunctuationMarkList, leftPunctuationMark) 189 } 190 return RecursiveTraitMultiPunctuationMarksContent(content, &PunctuationMarkInfo{ 191 PunctuationMark: 0, 192 Index: -1, 193 }, &PunctuationMarkInfo{ 194 PunctuationMark: 0, 195 Index: len(content), 196 }, leftPunctuationMarkList, maxDeep, 0) 197 } 198 199 // RecursiveTraitMultiPunctuationMarksContent 混合成对标点符号的内容分类提取 200 // @content 待处理内容 201 // @leftPunctuationMarkInfo 根节点的左标点符号 202 // @rightPunctuationMarkInfo 根节点的右标点符号 203 // @scopeLeftPunctuationMarkList 所有作为划分区域的左标点符号 204 // @maxDeep 待处理的最大深度 205 // @deep 当前深度 206 // @return 根节点 207 func RecursiveTraitMultiPunctuationMarksContent(content string, leftPunctuationMarkInfo, rightPunctuationMarkInfo *PunctuationMarkInfo, scopeLeftPunctuationMarkList []rune, maxDeep, deep int) *NewPunctuationContent { 208 punctuationContent := &NewPunctuationContent{ 209 Content: content, 210 LeftPunctuationMark: leftPunctuationMarkInfo, 211 RightPunctuationMark: rightPunctuationMarkInfo, 212 SubPunctuationContentList: make([]*NewPunctuationContent, 0), 213 } 214 215 passLeftLength := 0 216 for len(content) != 0 && deep != maxDeep { 217 var leftPunctuationMark rune 218 var rightPunctuationMark rune 219 leftPunctuationMarkIndex := len(content) - 1 220 221 for _, toSearchLeftPunctuationMark := range scopeLeftPunctuationMarkList { 222 toSearchLeftPunctuationMarkIndex := strings.IndexRune(content, toSearchLeftPunctuationMark) 223 if toSearchLeftPunctuationMarkIndex != -1 && toSearchLeftPunctuationMarkIndex < leftPunctuationMarkIndex { 224 leftPunctuationMarkIndex = toSearchLeftPunctuationMarkIndex 225 leftPunctuationMark = toSearchLeftPunctuationMark 226 } 227 } 228 // fmt.Printf("relative leftPunctuationMarkIndex = %v, leftPunctuationMark = %v\n", leftPunctuationMarkIndex, string(rune(leftPunctuationMark))) 229 230 rightPunctuationMark = GetAnotherPunctuationMark(leftPunctuationMark) 231 if leftPunctuationMark == 0 || rightPunctuationMark == 0 || leftPunctuationMarkIndex == len(content)-1 { 232 break 233 } 234 235 afterLeftPunctuationMarkContentIndex := leftPunctuationMarkIndex + 1 236 237 // fmt.Printf("pass CalculatePunctuationMarksContentLength = |%v|\n", content[afterLeftPunctuationMarkContentIndex:]) 238 length := CalculatePunctuationMarksContentLength(content[afterLeftPunctuationMarkContentIndex:], leftPunctuationMark, rightPunctuationMark, InvalidScopePunctuationMarkMap) 239 240 // fmt.Printf("after CalculatePunctuationMarksContentLength, length = %v\n", length) 241 242 rightPunctuationMarkIndex := leftPunctuationMarkIndex + length + 1 243 if rightPunctuationMarkIndex >= len(content) { 244 // fmt.Printf("rightPunctuationMarkIndex %v >= len(content) %v\n", rightPunctuationMarkIndex, len(content)) 245 break 246 } 247 248 // fmt.Printf("relative rightPunctuationMarkIndex = %v, rightPunctuationMark = %v\n", rightPunctuationMarkIndex, string(rune(rightPunctuationMark))) 249 // fmt.Printf("pass content = |%v|\n", content[leftPunctuationMarkIndex+1:rightPunctuationMarkIndex]) 250 251 subPunctuationContent := RecursiveTraitMultiPunctuationMarksContent(content[leftPunctuationMarkIndex+1:rightPunctuationMarkIndex], &PunctuationMarkInfo{ 252 PunctuationMark: leftPunctuationMark, 253 Index: leftPunctuationMarkInfo.Index + 1 + passLeftLength + leftPunctuationMarkIndex, 254 }, &PunctuationMarkInfo{ 255 PunctuationMark: rightPunctuationMark, 256 Index: leftPunctuationMarkInfo.Index + 1 + passLeftLength + rightPunctuationMarkIndex, 257 }, scopeLeftPunctuationMarkList, maxDeep, deep+1) 258 if subPunctuationContent != nil { 259 punctuationContent.SubPunctuationContentList = append(punctuationContent.SubPunctuationContentList, subPunctuationContent) 260 } 261 262 // fmt.Printf("update content to |%v|\n", content[rightPunctuationMarkIndex+1:]) 263 content = content[rightPunctuationMarkIndex+1:] 264 // fmt.Printf("update pass left from %v to %v\n", passLeftLength, passLeftLength+rightPunctuationMarkIndex+1) 265 passLeftLength += rightPunctuationMarkIndex + 1 266 // fmt.Println("--------------------------------") 267 } 268 269 return punctuationContent 270 } 271 272 // SplitContent 划分内容节点 273 type SplitContent struct { 274 ContentList []string 275 SubSplitContentList []*SplitContent 276 } 277 278 // RecursiveSplitUnderSameDeepPunctuationMarksContent 相同深度的成对标点符号下的内容划分 279 // @content 待分析的字符串 280 // @punctuationMarkList 指定成对标点符号 281 // @splitter 指定分隔符 282 // @return 283 func RecursiveSplitUnderSameDeepPunctuationMarksContent(content string, punctuationMarkList []int, splitter string) *SplitContent { 284 if punctuationContentNode := TraitMultiPunctuationMarksContent(content, punctuationMarkList, 1); punctuationContentNode != nil { 285 return splitUnderSameDeepPunctuationMarksContent(punctuationContentNode, splitter, 0, 0) 286 } 287 return nil 288 } 289 290 // RecursiveSplitUnderSameDeepPunctuationMarksContentNode 相同深度的成对标点符号下的内容划分 291 // @punctuationContentNode 成对标点符号的内容根节点,注意:必须是根节点,不能是某个子节点,节点深度必须为 2 292 // @splitter 指定分隔符 293 // @return 294 func RecursiveSplitUnderSameDeepPunctuationMarksContentNode(punctuationContentNode *NewPunctuationContent, splitter string) *SplitContent { 295 return splitUnderSameDeepPunctuationMarksContent(punctuationContentNode, splitter, 0, 0) 296 } 297 298 // splitUnderSameDeepPunctuationMarksContent 相同深度的成对标点符号下的内容划分的递归算法 299 // @punctuationContentNode 成对标点符号的内容根节点,注意:必须是根节点,不能是某个子节点,节点深度 >= 2,分析结果中深度大于 2 的数据不正确 300 // @splitter 指定分隔符 301 // @maxDeep 递归最大深度 302 // @deep 当前深度 303 func splitUnderSameDeepPunctuationMarksContent(punctuationContentNode *NewPunctuationContent, splitter string, maxDeep, deep int) *SplitContent { 304 splitContentNode := &SplitContent{ 305 ContentList: make([]string, 0), 306 SubSplitContentList: make([]*SplitContent, 0), 307 } 308 309 var offset int 310 var leftIndex int 311 cycle := 0 312 maxCycle := len(strings.Split(punctuationContentNode.Content, splitter)) 313 for cycle != maxCycle { 314 cycle++ 315 length := strings.Index(punctuationContentNode.Content[leftIndex+offset:], splitter) 316 if length == -1 { 317 splitContentNode.ContentList = append(splitContentNode.ContentList, punctuationContentNode.Content[leftIndex:]) 318 break 319 } 320 rightIndex := leftIndex + length + offset 321 inner := false 322 for _, subNode := range punctuationContentNode.SubPunctuationContentList { 323 // Note: 这里用于判断的依据是子节点相对父节点的左 区间符号 的下标 324 // Note: 但是节点的 区间符号 数据中记录的下标是相对于根节点的下标 -> 必须是根节点 325 // Note: 所以当节点数只有2时,这个下标可以代表相对父节点(根节点)的下标 -> 节点深度 >= 2 326 if subNode.LeftPunctuationMark.Index <= rightIndex && rightIndex <= subNode.RightPunctuationMark.Index { 327 inner = true 328 offset = subNode.RightPunctuationMark.Index - leftIndex + 1 329 break 330 } 331 } 332 if inner { 333 continue 334 } 335 splitContentNode.ContentList = append(splitContentNode.ContentList, punctuationContentNode.Content[leftIndex:rightIndex]) 336 offset = 0 337 leftIndex = rightIndex + len(splitter) 338 } 339 340 if deep == maxDeep { 341 return splitContentNode 342 } 343 344 for _, subPuncutationContentNode := range punctuationContentNode.SubPunctuationContentList { 345 if len(subPuncutationContentNode.Content) != 0 { 346 if subSplitContentNode := splitUnderSameDeepPunctuationMarksContent(subPuncutationContentNode, splitter, maxDeep, deep+1); subSplitContentNode != nil { 347 splitContentNode.SubSplitContentList = append(splitContentNode.SubSplitContentList, subSplitContentNode) 348 } 349 } 350 } 351 352 return splitContentNode 353 } 354 355 // ConvertSnakeCase2CamelCase 将蛇形命名法转换为驼峰命名法:xxx_yyy_zzz -> [X|x]xxYyyZzz 356 func ConvertSnakeCase2CamelCase(snakeCase string, capitalize bool) string { 357 camelStyleString := "" 358 for _, singleString := range strings.Split(snakeCase, "_") { 359 capitalizeSingleString := fmt.Sprintf("%v%v", strings.ToUpper(singleString[:1]), singleString[1:]) 360 camelStyleString = fmt.Sprintf("%v%v", camelStyleString, capitalizeSingleString) 361 } 362 if !capitalize { 363 camelStyleString = fmt.Sprintf("%v%v", strings.ToLower(camelStyleString[:1]), camelStyleString[1:]) 364 } 365 return camelStyleString 366 } 367 368 // ABC 开头 | 中间 DONE 369 // ConvertSnakeCase2CamelCaseWithAbbreviation 将蛇形命名法转换为驼峰命名法:xxx_yyy_zzz -> [X|x]xxYyyZzz 370 func ConvertSnakeCase2CamelCaseWithAbbreviation(snakeCase string, capitalize bool, abbreviationMap map[string]struct{}) string { 371 camelStyleString := "" 372 for _, singleString := range strings.Split(snakeCase, "_") { 373 if _, isPhrase := abbreviationMap[singleString]; isPhrase { 374 camelStyleString = fmt.Sprintf("%v%v", camelStyleString, singleString) 375 } else { 376 capitalizeSingleString := fmt.Sprintf("%v%v", strings.ToUpper(singleString[:1]), singleString[1:]) 377 camelStyleString = fmt.Sprintf("%v%v", camelStyleString, capitalizeSingleString) 378 } 379 } 380 if !capitalize { 381 camelStyleString = fmt.Sprintf("%v%v", strings.ToLower(camelStyleString[:1]), camelStyleString[1:]) 382 } 383 return camelStyleString 384 } 385 386 // ConvertCamelCase2SnakeCase 将驼峰命名法转换为蛇形命名法:XxxYyyZzz -> xxx_yyy_zzz 387 func ConvertCamelCase2SnakeCase(camelCase string) string { 388 builder := strings.Builder{} 389 for index, r := range camelCase { 390 if 'a' <= r && r <= 'z' { 391 builder.WriteRune(r) 392 continue 393 } 394 if index != 0 { 395 builder.WriteRune('_') 396 } 397 builder.WriteRune(r + 32) 398 } 399 return builder.String() 400 } 401 402 // Abc 开头 | 中间 DONE 403 // ABC 开头 | 中间 DONE 404 // ConvertCamelCase2SnakeCaseWithAbbreviation 将驼峰命名法转换为蛇形命名法:XxxYyyZzz -> xxx_yyy_zzz 405 func ConvertCamelCase2SnakeCaseWithAbbreviation(camelCase string, abbreviationMap map[string]struct{}) string { 406 allAbbreviationSubString := make(map[string]struct{}) 407 for abbreviation := range abbreviationMap { 408 for index := 0; index != len(abbreviation); index++ { 409 allAbbreviationSubString[abbreviation[0:index]] = struct{}{} 410 } 411 } 412 413 builder := strings.Builder{} 414 abbreviationBuilder := strings.Builder{} 415 isFirstAbbreviation := true 416 for _, r := range camelCase { 417 if 'a' <= r && r <= 'z' { 418 abbreviationBuilder.WriteRune(r) 419 } else { 420 if abbreviationBuilder.Len() > 0 { 421 if _, isAbbreviation := abbreviationMap[abbreviationBuilder.String()]; isAbbreviation { 422 if isFirstAbbreviation { 423 isFirstAbbreviation = false 424 } else { 425 builder.WriteRune('_') 426 } 427 builder.WriteString(abbreviationBuilder.String()) 428 abbreviationBuilder.Reset() 429 } else { 430 if _, maybeAbbreviation := allAbbreviationSubString[abbreviationBuilder.String()]; !maybeAbbreviation { 431 if isFirstAbbreviation { 432 isFirstAbbreviation = false 433 } else { 434 builder.WriteRune('_') 435 } 436 builder.WriteString(abbreviationBuilder.String()) 437 abbreviationBuilder.Reset() 438 } 439 } 440 } 441 abbreviationBuilder.WriteRune(r + 32) 442 } 443 } 444 builder.WriteRune('_') 445 builder.WriteString(abbreviationBuilder.String()) 446 return builder.String() 447 } 448 449 func SplitAny(groupSplitter, itemSplitter byte, content string) [][]string { 450 groupSlice := make([][]string, 0) 451 contentLength := len(content) 452 if contentLength > 0 { 453 groupSlice = append(groupSlice, make([]string, 0)) 454 } 455 456 itemStartIndex := 0 457 groupIndex := 0 458 for index := 0; index != contentLength; index++ { 459 switch { 460 case index+1 == contentLength: 461 groupSlice[groupIndex] = append(groupSlice[groupIndex], content[itemStartIndex:contentLength]) 462 itemStartIndex = index + 1 463 groupIndex++ 464 case content[index] == groupSplitter: 465 groupSlice[groupIndex] = append(groupSlice[groupIndex], content[itemStartIndex:index]) 466 itemStartIndex = index + 1 467 groupSlice = append(groupSlice, []string{}) 468 groupIndex++ 469 case content[index] == itemSplitter: 470 groupSlice[groupIndex] = append(groupSlice[groupIndex], content[itemStartIndex:index]) 471 itemStartIndex = index + 1 472 default: 473 continue 474 } 475 } 476 477 return groupSlice 478 } 479 480 // CleanFileComment 置空所有注释 481 func CleanFileComment(fileContent []byte) string { 482 isBlock, isComment := false, false 483 firstCommentIndex, secondCommentIndex := -1, -1 484 builder, commentBuffer := strings.Builder{}, strings.Builder{} 485 for index, b := range fileContent { 486 switch rune(b) { 487 case PunctuationMarkLeftDoubleQuotes: 488 if !isComment { 489 if !isBlock { 490 isBlock = true 491 } else { 492 isBlock = false 493 } 494 } 495 case '/': 496 if !isBlock { 497 if firstCommentIndex == -1 { 498 firstCommentIndex = index 499 } else if secondCommentIndex == -1 { 500 secondCommentIndex = index 501 isComment = true 502 commentBuffer.Reset() 503 } 504 } 505 case '\n': 506 if isComment { 507 isComment = false 508 firstCommentIndex = -1 509 secondCommentIndex = -1 510 commentBuffer.Reset() 511 } 512 } 513 514 if !isComment { 515 if firstCommentIndex != -1 && secondCommentIndex == -1 { 516 if commentBuffer.Len() > 0 { 517 // just one /, clear comment buffer 518 builder.WriteString(commentBuffer.String()) 519 builder.WriteByte(b) 520 firstCommentIndex = -1 521 commentBuffer.Reset() 522 } else { 523 // first match / 524 commentBuffer.WriteByte(b) 525 } 526 } else { 527 builder.WriteByte(b) 528 } 529 } 530 } 531 return builder.String() 532 }