github.com/benhoyt/goawk@v1.8.1/testdata/gawk/gsubtst2.awk (about)

     1  #From arnold  Thu May  9 17:27:03 2002
     2  #Return-Path: <arnold@skeeve.com>
     3  #Received: (from arnold@localhost)
     4  #	by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
     5  #	for arnold; Thu, 9 May 2002 17:27:03 +0300
     6  #Date: Thu, 9 May 2002 17:27:03 +0300
     7  #From: Aharon Robbins <arnold@skeeve.com>
     8  #Message-Id: <200205091427.g49ER3K27925@skeeve.com>
     9  #To: arnold@skeeve.com
    10  #Subject: fixme
    11  #X-SpamBouncer: 1.4 (10/07/01)
    12  #X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
    13  #X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
    14  #X-SBClass: Blocked
    15  #Status: O
    16  #
    17  #Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
    18  #From: laura@madonnaweb.com (laura fairhead)
    19  #Newsgroups: comp.lang.awk
    20  #Subject: bug in gawk3.1.0 regex code
    21  #Date: Wed, 08 May 2002 23:31:40 GMT
    22  #Organization: that'll be the daewooo :)
    23  #Lines: 211
    24  #Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
    25  #Reply-To: laura@madonnaweb.com
    26  #NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
    27  #X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
    28  #X-Newsreader: Forte Free Agent 1.21/32.243
    29  #Xref: dfw-artgen comp.lang.awk:13059
    30  #
    31  #
    32  #I believe I've just found a bug in gawk3.1.0 implementation of
    33  #extended regular expressions. It seems to be down to the alternation
    34  #operator; when using an end anchor '$' as a subexpression in an
    35  #alternation and the entire matched RE is a nul-string it fails
    36  #to match the end of string, for example;
    37  #
    38  #gsub(/$|2/,"x")
    39  #print
    40  #
    41  #input           = 12345
    42  #expected output = 1x345x
    43  #actual output   = 1x345
    44  #
    45  #The start anchor '^' always works as expected;
    46  #
    47  #gsub(/^|2/,"x")
    48  #print
    49  #
    50  #input           = 12345
    51  #expected output = x1x345
    52  #actual output   = x1x345
    53  #
    54  #This was with POSIX compliance enabled althought that doesn't
    55  #effect the result.
    56  #
    57  #I checked on gawk3.0.6 and got exactly the same results however
    58  #gawk2.15.6 gives the expected results.
    59  #
    60  #I'm about to post a bug report about this into gnu.utils.bug
    61  #but I thought I'd post it here first in case anyone has
    62  #any input/comments/whatever ....
    63  #
    64  #Complete test results were as follows;
    65  #
    66  #input          12345
    67  #output         gsub(/regex/,"x",input)
    68  #
    69  #regex          output
    70  #(^)            x12345
    71  #($)            12345x
    72  #(^)|($)        x12345x
    73  #($)|(^)        x12345x
    74  #(2)            1x345
    75  #(^)|2          x1x345
    76  #2|(^)          x1x345
    77  #($)|2          1x345
    78  #2|($)          1x345
    79  #(2)|(^)        x1x345
    80  #(^)|(2)        x1x345
    81  #(2)|($)        1x345
    82  #($)|(2)        1x345
    83  #.((2)|(^))     x345
    84  #.((^)|(2))     x345
    85  #.((2)|($))     x34x
    86  #.(($)|(2))     x34x
    87  #x{0}((2)|(^))  x1x345
    88  #x{0}((^)|(2))  x1x345
    89  #x{0}((2)|($))  1x345
    90  #x{0}(($)|(2))  1x345
    91  #x*((2)|(^))    x1x345
    92  #x*((^)|(2))    x1x345
    93  #x*((2)|($))    1x345
    94  #x*(($)|(2))    1x345
    95  #
    96  #Here's the test program I used, a few of the cases use ERE {n[,[m]]}
    97  #operators so that will have to be commented out or have a check
    98  #added or something (should have put a conditional in I know... ;-)
    99  #
   100  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   101  #
   102  BEGIN{
   103  
   104  TESTSTR="12345"
   105  
   106  print "input          "TESTSTR
   107  print "output         gsub(/regex/,\"x\",input)"
   108  print ""
   109  
   110  print "regex          output"
   111  $0=TESTSTR
   112  gsub(/(^)/,"x")
   113  print "(^)            "$0
   114  
   115  $0=TESTSTR
   116  gsub(/($)/,"x")
   117  print "($)            "$0
   118  
   119  $0=TESTSTR
   120  gsub(/(^)|($)/,"x")
   121  print "(^)|($)        "$0
   122  
   123  $0=TESTSTR
   124  gsub(/($)|(^)/,"x")
   125  print "($)|(^)        "$0
   126  
   127  $0=TESTSTR
   128  gsub(/2/,"x")
   129  print "(2)            "$0
   130  
   131  $0=TESTSTR
   132  gsub(/(^)|2/,"x")
   133  print "(^)|2          "$0
   134  
   135  $0=TESTSTR
   136  gsub(/2|(^)/,"x")
   137  print "2|(^)          "$0
   138  
   139  $0=TESTSTR
   140  gsub(/($)|2/,"x")
   141  print "($)|2          "$0
   142  
   143  $0=TESTSTR
   144  gsub(/2|($)/,"x")
   145  print "2|($)          "$0
   146  
   147  $0=TESTSTR
   148  gsub(/(2)|(^)/,"x")
   149  print "(2)|(^)        "$0
   150  
   151  $0=TESTSTR
   152  gsub(/(^)|(2)/,"x")
   153  print "(^)|(2)        "$0
   154  
   155  $0=TESTSTR
   156  gsub(/(2)|($)/,"x")
   157  print "(2)|($)        "$0
   158  
   159  $0=TESTSTR
   160  gsub(/($)|(2)/,"x")
   161  print "($)|(2)        "$0
   162  
   163  $0=TESTSTR
   164  gsub(/.((2)|(^))/,"x")
   165  print ".((2)|(^))     "$0
   166  
   167  $0=TESTSTR
   168  gsub(/.((^)|(2))/,"x")
   169  print ".((^)|(2))     "$0
   170  
   171  $0=TESTSTR
   172  gsub(/.((2)|($))/,"x")
   173  print ".((2)|($))     "$0
   174  
   175  $0=TESTSTR
   176  gsub(/.(($)|(2))/,"x")
   177  print ".(($)|(2))     "$0
   178  
   179  # $0=TESTSTR
   180  # gsub(/x{0}((2)|(^))/,"x")
   181  # print "x{0}((2)|(^))  "$0
   182  # 
   183  # $0=TESTSTR
   184  # gsub(/x{0}((^)|(2))/,"x")
   185  # print "x{0}((^)|(2))  "$0
   186  # 
   187  # $0=TESTSTR
   188  # gsub(/x{0}((2)|($))/,"x")
   189  # print "x{0}((2)|($))  "$0
   190  # 
   191  # $0=TESTSTR
   192  # gsub(/x{0}(($)|(2))/,"x")
   193  # print "x{0}(($)|(2))  "$0
   194  
   195  $0=TESTSTR
   196  gsub(/x*((2)|(^))/,"x")
   197  print "x*((2)|(^))    "$0
   198  
   199  $0=TESTSTR
   200  gsub(/x*((^)|(2))/,"x")
   201  print "x*((^)|(2))    "$0
   202  
   203  $0=TESTSTR
   204  gsub(/x*((2)|($))/,"x")
   205  print "x*((2)|($))    "$0
   206  
   207  $0=TESTSTR
   208  gsub(/x*(($)|(2))/,"x")
   209  print "x*(($)|(2))    "$0
   210  
   211  # $0=TESTSTR
   212  # gsub(/x{0}^/,"x")
   213  # print "x{0}^          "$0
   214  # 
   215  # $0=TESTSTR
   216  # gsub(/x{0}$/,"x")
   217  # print "x{0}$          "$0
   218  # 
   219  # $0=TESTSTR
   220  # gsub(/(x{0}^)|2/,"x")
   221  # print "(x{0}^)|2      "$0
   222  # 
   223  # $0=TESTSTR
   224  # gsub(/(x{0}$)|2/,"x")
   225  # print "(x{0}$)|2      "$0
   226  
   227  
   228  }
   229  #
   230  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   231  #
   232  #byefrom
   233  #
   234  #-- 
   235  #laura fairhead  # laura@madonnaweb.com  http://lf.8k.com
   236  #                # if you are bored crack my sig.
   237  #1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
   238  #EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
   239  #630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
   240  #8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
   241  #80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492