github.com/ice-blockchain/go/src@v0.0.0-20240403114104-1564d284e521/regexp/syntax/simplify_test.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 import "testing" 8 9 var simplifyTests = []struct { 10 Regexp string 11 Simple string 12 }{ 13 // Already-simple constructs 14 {`a`, `a`}, 15 {`ab`, `ab`}, 16 {`a|b`, `[ab]`}, 17 {`ab|cd`, `ab|cd`}, 18 {`(ab)*`, `(ab)*`}, 19 {`(ab)+`, `(ab)+`}, 20 {`(ab)?`, `(ab)?`}, 21 {`.`, `(?s:.)`}, 22 {`^`, `(?m:^)`}, 23 {`$`, `(?m:$)`}, 24 {`[ac]`, `[ac]`}, 25 {`[^ac]`, `[^ac]`}, 26 27 // Posix character classes 28 {`[[:alnum:]]`, `[0-9A-Za-z]`}, 29 {`[[:alpha:]]`, `[A-Za-z]`}, 30 {`[[:blank:]]`, `[\t ]`}, 31 {`[[:cntrl:]]`, `[\x00-\x1f\x7f]`}, 32 {`[[:digit:]]`, `[0-9]`}, 33 {`[[:graph:]]`, `[!-~]`}, 34 {`[[:lower:]]`, `[a-z]`}, 35 {`[[:print:]]`, `[ -~]`}, 36 {`[[:punct:]]`, "[!-/:-@\\[-`\\{-~]"}, 37 {`[[:space:]]`, `[\t-\r ]`}, 38 {`[[:upper:]]`, `[A-Z]`}, 39 {`[[:xdigit:]]`, `[0-9A-Fa-f]`}, 40 41 // Perl character classes 42 {`\d`, `[0-9]`}, 43 {`\s`, `[\t\n\f\r ]`}, 44 {`\w`, `[0-9A-Z_a-z]`}, 45 {`\D`, `[^0-9]`}, 46 {`\S`, `[^\t\n\f\r ]`}, 47 {`\W`, `[^0-9A-Z_a-z]`}, 48 {`[\d]`, `[0-9]`}, 49 {`[\s]`, `[\t\n\f\r ]`}, 50 {`[\w]`, `[0-9A-Z_a-z]`}, 51 {`[\D]`, `[^0-9]`}, 52 {`[\S]`, `[^\t\n\f\r ]`}, 53 {`[\W]`, `[^0-9A-Z_a-z]`}, 54 55 // Posix repetitions 56 {`a{1}`, `a`}, 57 {`a{2}`, `aa`}, 58 {`a{5}`, `aaaaa`}, 59 {`a{0,1}`, `a?`}, 60 // The next three are illegible because Simplify inserts (?:) 61 // parens instead of () parens to avoid creating extra 62 // captured subexpressions. The comments show a version with fewer parens. 63 {`(a){0,2}`, `(?:(a)(a)?)?`}, // (aa?)? 64 {`(a){0,4}`, `(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // (a(a(aa?)?)?)? 65 {`(a){2,6}`, `(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?`}, // aa(a(a(aa?)?)?)? 66 {`a{0,2}`, `(?:aa?)?`}, // (aa?)? 67 {`a{0,4}`, `(?:a(?:a(?:aa?)?)?)?`}, // (a(a(aa?)?)?)? 68 {`a{2,6}`, `aa(?:a(?:a(?:aa?)?)?)?`}, // aa(a(a(aa?)?)?)? 69 {`a{0,}`, `a*`}, 70 {`a{1,}`, `a+`}, 71 {`a{2,}`, `aa+`}, 72 {`a{5,}`, `aaaaa+`}, 73 74 // Test that operators simplify their arguments. 75 {`(?:a{1,}){1,}`, `a+`}, 76 {`(a{1,}b{1,})`, `(a+b+)`}, 77 {`a{1,}|b{1,}`, `a+|b+`}, 78 {`(?:a{1,})*`, `(?:a+)*`}, 79 {`(?:a{1,})+`, `a+`}, 80 {`(?:a{1,})?`, `(?:a+)?`}, 81 {``, `(?:)`}, 82 {`a{0}`, `(?:)`}, 83 84 // Character class simplification 85 {`[ab]`, `[ab]`}, 86 {`[abc]`, `[a-c]`}, 87 {`[a-za-za-z]`, `[a-z]`}, 88 {`[A-Za-zA-Za-z]`, `[A-Za-z]`}, 89 {`[ABCDEFGH]`, `[A-H]`}, 90 {`[AB-CD-EF-GH]`, `[A-H]`}, 91 {`[W-ZP-XE-R]`, `[E-Z]`}, 92 {`[a-ee-gg-m]`, `[a-m]`}, 93 {`[a-ea-ha-m]`, `[a-m]`}, 94 {`[a-ma-ha-e]`, `[a-m]`}, 95 {`[a-zA-Z0-9 -~]`, `[ -~]`}, 96 97 // Empty character classes 98 {`[^[:cntrl:][:^cntrl:]]`, `[^\x00-\x{10FFFF}]`}, 99 100 // Full character classes 101 {`[[:cntrl:][:^cntrl:]]`, `(?s:.)`}, 102 103 // Unicode case folding. 104 {`(?i)A`, `(?i:A)`}, 105 {`(?i)a`, `(?i:A)`}, 106 {`(?i)[A]`, `(?i:A)`}, 107 {`(?i)[a]`, `(?i:A)`}, 108 {`(?i)K`, `(?i:K)`}, 109 {`(?i)k`, `(?i:K)`}, 110 {`(?i)\x{212a}`, "(?i:K)"}, 111 {`(?i)[K]`, "[Kk\u212A]"}, 112 {`(?i)[k]`, "[Kk\u212A]"}, 113 {`(?i)[\x{212a}]`, "[Kk\u212A]"}, 114 {`(?i)[a-z]`, "[A-Za-z\u017F\u212A]"}, 115 {`(?i)[\x00-\x{FFFD}]`, "[\\x00-\uFFFD]"}, 116 {`(?i)[\x00-\x{10FFFF}]`, `(?s:.)`}, 117 118 // Empty string as a regular expression. 119 // The empty string must be preserved inside parens in order 120 // to make submatches work right, so these tests are less 121 // interesting than they might otherwise be. String inserts 122 // explicit (?:) in place of non-parenthesized empty strings, 123 // to make them easier to spot for other parsers. 124 {`(a|b|c|)`, `([a-c]|(?:))`}, 125 {`(a|b|)`, `([ab]|(?:))`}, 126 {`(|)`, `()`}, 127 {`a()`, `a()`}, 128 {`(()|())`, `(()|())`}, 129 {`(a|)`, `(a|(?:))`}, 130 {`ab()cd()`, `ab()cd()`}, 131 {`()`, `()`}, 132 {`()*`, `()*`}, 133 {`()+`, `()+`}, 134 {`()?`, `()?`}, 135 {`(){0}`, `(?:)`}, 136 {`(){1}`, `()`}, 137 {`(){1,}`, `()+`}, 138 {`(){0,2}`, `(?:()()?)?`}, 139 } 140 141 func TestSimplify(t *testing.T) { 142 for _, tt := range simplifyTests { 143 re, err := Parse(tt.Regexp, MatchNL|Perl&^OneLine) 144 if err != nil { 145 t.Errorf("Parse(%#q) = error %v", tt.Regexp, err) 146 continue 147 } 148 s := re.Simplify().String() 149 if s != tt.Simple { 150 t.Errorf("Simplify(%#q) = %#q, want %#q", tt.Regexp, s, tt.Simple) 151 } 152 } 153 }