github.com/benhoyt/goawk@v1.8.1/testdata/gawk/gsubtst4.awk (about) 1 # From arnold Thu May 9 17:27:03 2002 2 # Return-Path: <arnold@skeeve.com> 3 # Received: (from arnold@localhost) 4 # by skeeve.com (8.11.6/8.11.6) id g49ER3K27925 5 # for arnold; Thu, 9 May 2002 17:27:03 +0300 6 # Date: Thu, 9 May 2002 17:27:03 +0300 7 # From: Aharon Robbins <arnold@skeeve.com> 8 # Message-Id: <200205091427.g49ER3K27925@skeeve.com> 9 # To: arnold@skeeve.com 10 # Subject: fixme 11 # X-SpamBouncer: 1.4 (10/07/01) 12 # X-SBRule: Pattern Match (Other Patterns) (Score: 4850) 13 # X-SBRule: Pattern Match (Spam Phone #) (Score: 0) 14 # X-SBClass: Blocked 15 # Status: RO 16 # 17 # Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail 18 # From: laura@madonnaweb.com (laura fairhead) 19 # Newsgroups: comp.lang.awk 20 # Subject: bug in gawk3.1.0 regex code 21 # Date: Wed, 08 May 2002 23:31:40 GMT 22 # Organization: that'll be the daewooo :) 23 # Lines: 211 24 # Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE> 25 # Reply-To: laura@madonnaweb.com 26 # NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48) 27 # X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286]) 28 # X-Newsreader: Forte Free Agent 1.21/32.243 29 # Xref: dfw-artgen comp.lang.awk:13059 30 # 31 # 32 # I believe I've just found a bug in gawk3.1.0 implementation of 33 # extended regular expressions. It seems to be down to the alternation 34 # operator; when using an end anchor '$' as a subexpression in an 35 # alternation and the entire matched RE is a nul-string it fails 36 # to match the end of string, for example; 37 # 38 # gsub(/$|2/,"x") 39 # print 40 # 41 # input = 12345 42 # expected output = 1x345x 43 # actual output = 1x345 44 # 45 # The start anchor '^' always works as expected; 46 # 47 # gsub(/^|2/,"x") 48 # print 49 # 50 # input = 12345 51 # expected output = x1x345 52 # actual output = x1x345 53 # 54 # This was with POSIX compliance enabled althought that doesn't 55 # effect the result. 56 # 57 # I checked on gawk3.0.6 and got exactly the same results however 58 # gawk2.15.6 gives the expected results. 59 # 60 # I'm about to post a bug report about this into gnu.utils.bug 61 # but I thought I'd post it here first in case anyone has 62 # any input/comments/whatever .... 63 # 64 # Complete test results were as follows; 65 # 66 # input 12345 67 # output gsub(/regex/,"x",input) 68 # 69 # regex output 70 # (^) x12345 71 # ($) 12345x 72 # (^)|($) x12345x 73 # ($)|(^) x12345x 74 # (2) 1x345 75 # (^)|2 x1x345 76 # 2|(^) x1x345 77 # ($)|2 1x345 78 # 2|($) 1x345 79 # (2)|(^) x1x345 80 # (^)|(2) x1x345 81 # (2)|($) 1x345 82 # ($)|(2) 1x345 83 # .((2)|(^)) x345 84 # .((^)|(2)) x345 85 # .((2)|($)) x34x 86 # .(($)|(2)) x34x 87 # x{0}((2)|(^)) x1x345 88 # x{0}((^)|(2)) x1x345 89 # x{0}((2)|($)) 1x345 90 # x{0}(($)|(2)) 1x345 91 # x*((2)|(^)) x1x345 92 # x*((^)|(2)) x1x345 93 # x*((2)|($)) 1x345 94 # x*(($)|(2)) 1x345 95 # 96 # Here's the test program I used, a few of the cases use ERE {n[,[m]]} 97 # operators so that will have to be commented out or have a check 98 # added or something (should have put a conditional in I know... ;-) 99 # 100 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 101 # 102 BEGIN{ 103 104 TESTSTR="12345" 105 106 print "input "TESTSTR 107 print "output gsub(/regex/,\"x\",input)" 108 print "" 109 110 print "regex output" 111 $0=TESTSTR 112 gsub(/(^)/,"x") 113 print "(^) "$0 114 115 $0=TESTSTR 116 gsub(/($)/,"x") 117 print "($) "$0 118 119 $0=TESTSTR 120 gsub(/(^)|($)/,"x") 121 print "(^)|($) "$0 122 123 $0=TESTSTR 124 gsub(/($)|(^)/,"x") 125 print "($)|(^) "$0 126 127 $0=TESTSTR 128 gsub(/2/,"x") 129 print "(2) "$0 130 131 $0=TESTSTR 132 gsub(/(^)|2/,"x") 133 print "(^)|2 "$0 134 135 $0=TESTSTR 136 gsub(/2|(^)/,"x") 137 print "2|(^) "$0 138 139 $0=TESTSTR 140 gsub(/($)|2/,"x") 141 print "($)|2 "$0 142 143 $0=TESTSTR 144 gsub(/2|($)/,"x") 145 print "2|($) "$0 146 147 $0=TESTSTR 148 gsub(/(2)|(^)/,"x") 149 print "(2)|(^) "$0 150 151 $0=TESTSTR 152 gsub(/(^)|(2)/,"x") 153 print "(^)|(2) "$0 154 155 $0=TESTSTR 156 gsub(/(2)|($)/,"x") 157 print "(2)|($) "$0 158 159 $0=TESTSTR 160 gsub(/($)|(2)/,"x") 161 print "($)|(2) "$0 162 163 $0=TESTSTR 164 gsub(/.((2)|(^))/,"x") 165 print ".((2)|(^)) "$0 166 167 $0=TESTSTR 168 gsub(/.((^)|(2))/,"x") 169 print ".((^)|(2)) "$0 170 171 $0=TESTSTR 172 gsub(/.((2)|($))/,"x") 173 print ".((2)|($)) "$0 174 175 $0=TESTSTR 176 gsub(/.(($)|(2))/,"x") 177 print ".(($)|(2)) "$0 178 179 $0=TESTSTR 180 gsub(/x{0}((2)|(^))/,"x") 181 print "x{0}((2)|(^)) "$0 182 183 $0=TESTSTR 184 gsub(/x{0}((^)|(2))/,"x") 185 print "x{0}((^)|(2)) "$0 186 187 $0=TESTSTR 188 gsub(/x{0}((2)|($))/,"x") 189 print "x{0}((2)|($)) "$0 190 191 $0=TESTSTR 192 gsub(/x{0}(($)|(2))/,"x") 193 print "x{0}(($)|(2)) "$0 194 195 $0=TESTSTR 196 gsub(/x*((2)|(^))/,"x") 197 print "x*((2)|(^)) "$0 198 199 $0=TESTSTR 200 gsub(/x*((^)|(2))/,"x") 201 print "x*((^)|(2)) "$0 202 203 $0=TESTSTR 204 gsub(/x*((2)|($))/,"x") 205 print "x*((2)|($)) "$0 206 207 $0=TESTSTR 208 gsub(/x*(($)|(2))/,"x") 209 print "x*(($)|(2)) "$0 210 211 $0=TESTSTR 212 gsub(/x{0}^/,"x") 213 print "x{0}^ "$0 214 215 $0=TESTSTR 216 gsub(/x{0}$/,"x") 217 print "x{0}$ "$0 218 219 $0=TESTSTR 220 gsub(/(x{0}^)|2/,"x") 221 print "(x{0}^)|2 "$0 222 223 $0=TESTSTR 224 gsub(/(x{0}$)|2/,"x") 225 print "(x{0}$)|2 "$0 226 227 228 } 229 # 230 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 231 # 232 # byefrom 233 # 234 # -- 235 # laura fairhead # laura@madonnaweb.com http://lf.8k.com 236 # # if you are bored crack my sig. 237 # 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A 238 # EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E 239 # 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA 240 # 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978 241 # 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492 242 #