github.com/benhoyt/goawk@v1.8.1/testdata/gawk/gsubtst2.awk (about) 1 #From arnold Thu May 9 17:27:03 2002 2 #Return-Path: <arnold@skeeve.com> 3 #Received: (from arnold@localhost) 4 # by skeeve.com (8.11.6/8.11.6) id g49ER3K27925 5 # for arnold; Thu, 9 May 2002 17:27:03 +0300 6 #Date: Thu, 9 May 2002 17:27:03 +0300 7 #From: Aharon Robbins <arnold@skeeve.com> 8 #Message-Id: <200205091427.g49ER3K27925@skeeve.com> 9 #To: arnold@skeeve.com 10 #Subject: fixme 11 #X-SpamBouncer: 1.4 (10/07/01) 12 #X-SBRule: Pattern Match (Other Patterns) (Score: 4850) 13 #X-SBRule: Pattern Match (Spam Phone #) (Score: 0) 14 #X-SBClass: Blocked 15 #Status: O 16 # 17 #Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail 18 #From: laura@madonnaweb.com (laura fairhead) 19 #Newsgroups: comp.lang.awk 20 #Subject: bug in gawk3.1.0 regex code 21 #Date: Wed, 08 May 2002 23:31:40 GMT 22 #Organization: that'll be the daewooo :) 23 #Lines: 211 24 #Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE> 25 #Reply-To: laura@madonnaweb.com 26 #NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48) 27 #X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286]) 28 #X-Newsreader: Forte Free Agent 1.21/32.243 29 #Xref: dfw-artgen comp.lang.awk:13059 30 # 31 # 32 #I believe I've just found a bug in gawk3.1.0 implementation of 33 #extended regular expressions. It seems to be down to the alternation 34 #operator; when using an end anchor '$' as a subexpression in an 35 #alternation and the entire matched RE is a nul-string it fails 36 #to match the end of string, for example; 37 # 38 #gsub(/$|2/,"x") 39 #print 40 # 41 #input = 12345 42 #expected output = 1x345x 43 #actual output = 1x345 44 # 45 #The start anchor '^' always works as expected; 46 # 47 #gsub(/^|2/,"x") 48 #print 49 # 50 #input = 12345 51 #expected output = x1x345 52 #actual output = x1x345 53 # 54 #This was with POSIX compliance enabled althought that doesn't 55 #effect the result. 56 # 57 #I checked on gawk3.0.6 and got exactly the same results however 58 #gawk2.15.6 gives the expected results. 59 # 60 #I'm about to post a bug report about this into gnu.utils.bug 61 #but I thought I'd post it here first in case anyone has 62 #any input/comments/whatever .... 63 # 64 #Complete test results were as follows; 65 # 66 #input 12345 67 #output gsub(/regex/,"x",input) 68 # 69 #regex output 70 #(^) x12345 71 #($) 12345x 72 #(^)|($) x12345x 73 #($)|(^) x12345x 74 #(2) 1x345 75 #(^)|2 x1x345 76 #2|(^) x1x345 77 #($)|2 1x345 78 #2|($) 1x345 79 #(2)|(^) x1x345 80 #(^)|(2) x1x345 81 #(2)|($) 1x345 82 #($)|(2) 1x345 83 #.((2)|(^)) x345 84 #.((^)|(2)) x345 85 #.((2)|($)) x34x 86 #.(($)|(2)) x34x 87 #x{0}((2)|(^)) x1x345 88 #x{0}((^)|(2)) x1x345 89 #x{0}((2)|($)) 1x345 90 #x{0}(($)|(2)) 1x345 91 #x*((2)|(^)) x1x345 92 #x*((^)|(2)) x1x345 93 #x*((2)|($)) 1x345 94 #x*(($)|(2)) 1x345 95 # 96 #Here's the test program I used, a few of the cases use ERE {n[,[m]]} 97 #operators so that will have to be commented out or have a check 98 #added or something (should have put a conditional in I know... ;-) 99 # 100 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 # 102 BEGIN{ 103 104 TESTSTR="12345" 105 106 print "input "TESTSTR 107 print "output gsub(/regex/,\"x\",input)" 108 print "" 109 110 print "regex output" 111 $0=TESTSTR 112 gsub(/(^)/,"x") 113 print "(^) "$0 114 115 $0=TESTSTR 116 gsub(/($)/,"x") 117 print "($) "$0 118 119 $0=TESTSTR 120 gsub(/(^)|($)/,"x") 121 print "(^)|($) "$0 122 123 $0=TESTSTR 124 gsub(/($)|(^)/,"x") 125 print "($)|(^) "$0 126 127 $0=TESTSTR 128 gsub(/2/,"x") 129 print "(2) "$0 130 131 $0=TESTSTR 132 gsub(/(^)|2/,"x") 133 print "(^)|2 "$0 134 135 $0=TESTSTR 136 gsub(/2|(^)/,"x") 137 print "2|(^) "$0 138 139 $0=TESTSTR 140 gsub(/($)|2/,"x") 141 print "($)|2 "$0 142 143 $0=TESTSTR 144 gsub(/2|($)/,"x") 145 print "2|($) "$0 146 147 $0=TESTSTR 148 gsub(/(2)|(^)/,"x") 149 print "(2)|(^) "$0 150 151 $0=TESTSTR 152 gsub(/(^)|(2)/,"x") 153 print "(^)|(2) "$0 154 155 $0=TESTSTR 156 gsub(/(2)|($)/,"x") 157 print "(2)|($) "$0 158 159 $0=TESTSTR 160 gsub(/($)|(2)/,"x") 161 print "($)|(2) "$0 162 163 $0=TESTSTR 164 gsub(/.((2)|(^))/,"x") 165 print ".((2)|(^)) "$0 166 167 $0=TESTSTR 168 gsub(/.((^)|(2))/,"x") 169 print ".((^)|(2)) "$0 170 171 $0=TESTSTR 172 gsub(/.((2)|($))/,"x") 173 print ".((2)|($)) "$0 174 175 $0=TESTSTR 176 gsub(/.(($)|(2))/,"x") 177 print ".(($)|(2)) "$0 178 179 # $0=TESTSTR 180 # gsub(/x{0}((2)|(^))/,"x") 181 # print "x{0}((2)|(^)) "$0 182 # 183 # $0=TESTSTR 184 # gsub(/x{0}((^)|(2))/,"x") 185 # print "x{0}((^)|(2)) "$0 186 # 187 # $0=TESTSTR 188 # gsub(/x{0}((2)|($))/,"x") 189 # print "x{0}((2)|($)) "$0 190 # 191 # $0=TESTSTR 192 # gsub(/x{0}(($)|(2))/,"x") 193 # print "x{0}(($)|(2)) "$0 194 195 $0=TESTSTR 196 gsub(/x*((2)|(^))/,"x") 197 print "x*((2)|(^)) "$0 198 199 $0=TESTSTR 200 gsub(/x*((^)|(2))/,"x") 201 print "x*((^)|(2)) "$0 202 203 $0=TESTSTR 204 gsub(/x*((2)|($))/,"x") 205 print "x*((2)|($)) "$0 206 207 $0=TESTSTR 208 gsub(/x*(($)|(2))/,"x") 209 print "x*(($)|(2)) "$0 210 211 # $0=TESTSTR 212 # gsub(/x{0}^/,"x") 213 # print "x{0}^ "$0 214 # 215 # $0=TESTSTR 216 # gsub(/x{0}$/,"x") 217 # print "x{0}$ "$0 218 # 219 # $0=TESTSTR 220 # gsub(/(x{0}^)|2/,"x") 221 # print "(x{0}^)|2 "$0 222 # 223 # $0=TESTSTR 224 # gsub(/(x{0}$)|2/,"x") 225 # print "(x{0}$)|2 "$0 226 227 228 } 229 # 230 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 231 # 232 #byefrom 233 # 234 #-- 235 #laura fairhead # laura@madonnaweb.com http://lf.8k.com 236 # # if you are bored crack my sig. 237 #1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A 238 #EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E 239 #630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA 240 #8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978 241 #80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492