github.com/benhoyt/goawk@v1.8.1/testdata/gawk/gsubtst4.awk (about)

     1  # From arnold  Thu May  9 17:27:03 2002
     2  # Return-Path: <arnold@skeeve.com>
     3  # Received: (from arnold@localhost)
     4  # 	by skeeve.com (8.11.6/8.11.6) id g49ER3K27925
     5  # 	for arnold; Thu, 9 May 2002 17:27:03 +0300
     6  # Date: Thu, 9 May 2002 17:27:03 +0300
     7  # From: Aharon Robbins <arnold@skeeve.com>
     8  # Message-Id: <200205091427.g49ER3K27925@skeeve.com>
     9  # To: arnold@skeeve.com
    10  # Subject: fixme
    11  # X-SpamBouncer: 1.4 (10/07/01)
    12  # X-SBRule: Pattern Match (Other Patterns) (Score: 4850)
    13  # X-SBRule: Pattern Match (Spam Phone #) (Score: 0)
    14  # X-SBClass: Blocked
    15  # Status: RO
    16  # 
    17  # Path: ord-read.news.verio.net!dfw-artgen!iad-peer.news.verio.net!news.verio.net!fu-berlin.de!uni-berlin.de!host213-120-137-48.in-addr.btopenworld.COM!not-for-mail
    18  # From: laura@madonnaweb.com (laura fairhead)
    19  # Newsgroups: comp.lang.awk
    20  # Subject: bug in gawk3.1.0 regex code
    21  # Date: Wed, 08 May 2002 23:31:40 GMT
    22  # Organization: that'll be the daewooo :)
    23  # Lines: 211
    24  # Message-ID: <3cd9b0f7.29675926@NEWS.CIS.DFN.DE>
    25  # Reply-To: laura@madonnaweb.com
    26  # NNTP-Posting-Host: host213-120-137-48.in-addr.btopenworld.com (213.120.137.48)
    27  # X-Trace: fu-berlin.de 1020900891 18168286 213.120.137.48 (16 [53286])
    28  # X-Newsreader: Forte Free Agent 1.21/32.243
    29  # Xref: dfw-artgen comp.lang.awk:13059
    30  # 
    31  # 
    32  # I believe I've just found a bug in gawk3.1.0 implementation of
    33  # extended regular expressions. It seems to be down to the alternation
    34  # operator; when using an end anchor '$' as a subexpression in an
    35  # alternation and the entire matched RE is a nul-string it fails
    36  # to match the end of string, for example;
    37  # 
    38  # gsub(/$|2/,"x")
    39  # print
    40  # 
    41  # input           = 12345
    42  # expected output = 1x345x
    43  # actual output   = 1x345
    44  # 
    45  # The start anchor '^' always works as expected;
    46  # 
    47  # gsub(/^|2/,"x")
    48  # print
    49  # 
    50  # input           = 12345
    51  # expected output = x1x345
    52  # actual output   = x1x345
    53  # 
    54  # This was with POSIX compliance enabled althought that doesn't
    55  # effect the result.
    56  # 
    57  # I checked on gawk3.0.6 and got exactly the same results however
    58  # gawk2.15.6 gives the expected results.
    59  # 
    60  # I'm about to post a bug report about this into gnu.utils.bug
    61  # but I thought I'd post it here first in case anyone has
    62  # any input/comments/whatever ....
    63  # 
    64  # Complete test results were as follows;
    65  # 
    66  # input          12345
    67  # output         gsub(/regex/,"x",input)
    68  # 
    69  # regex          output
    70  # (^)            x12345
    71  # ($)            12345x
    72  # (^)|($)        x12345x
    73  # ($)|(^)        x12345x
    74  # (2)            1x345
    75  # (^)|2          x1x345
    76  # 2|(^)          x1x345
    77  # ($)|2          1x345
    78  # 2|($)          1x345
    79  # (2)|(^)        x1x345
    80  # (^)|(2)        x1x345
    81  # (2)|($)        1x345
    82  # ($)|(2)        1x345
    83  # .((2)|(^))     x345
    84  # .((^)|(2))     x345
    85  # .((2)|($))     x34x
    86  # .(($)|(2))     x34x
    87  # x{0}((2)|(^))  x1x345
    88  # x{0}((^)|(2))  x1x345
    89  # x{0}((2)|($))  1x345
    90  # x{0}(($)|(2))  1x345
    91  # x*((2)|(^))    x1x345
    92  # x*((^)|(2))    x1x345
    93  # x*((2)|($))    1x345
    94  # x*(($)|(2))    1x345
    95  # 
    96  # Here's the test program I used, a few of the cases use ERE {n[,[m]]}
    97  # operators so that will have to be commented out or have a check
    98  # added or something (should have put a conditional in I know... ;-)
    99  # 
   100  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   101  # 
   102  BEGIN{
   103  
   104  TESTSTR="12345"
   105  
   106  print "input          "TESTSTR
   107  print "output         gsub(/regex/,\"x\",input)"
   108  print ""
   109  
   110  print "regex          output"
   111  $0=TESTSTR
   112  gsub(/(^)/,"x")
   113  print "(^)            "$0
   114  
   115  $0=TESTSTR
   116  gsub(/($)/,"x")
   117  print "($)            "$0
   118  
   119  $0=TESTSTR
   120  gsub(/(^)|($)/,"x")
   121  print "(^)|($)        "$0
   122  
   123  $0=TESTSTR
   124  gsub(/($)|(^)/,"x")
   125  print "($)|(^)        "$0
   126  
   127  $0=TESTSTR
   128  gsub(/2/,"x")
   129  print "(2)            "$0
   130  
   131  $0=TESTSTR
   132  gsub(/(^)|2/,"x")
   133  print "(^)|2          "$0
   134  
   135  $0=TESTSTR
   136  gsub(/2|(^)/,"x")
   137  print "2|(^)          "$0
   138  
   139  $0=TESTSTR
   140  gsub(/($)|2/,"x")
   141  print "($)|2          "$0
   142  
   143  $0=TESTSTR
   144  gsub(/2|($)/,"x")
   145  print "2|($)          "$0
   146  
   147  $0=TESTSTR
   148  gsub(/(2)|(^)/,"x")
   149  print "(2)|(^)        "$0
   150  
   151  $0=TESTSTR
   152  gsub(/(^)|(2)/,"x")
   153  print "(^)|(2)        "$0
   154  
   155  $0=TESTSTR
   156  gsub(/(2)|($)/,"x")
   157  print "(2)|($)        "$0
   158  
   159  $0=TESTSTR
   160  gsub(/($)|(2)/,"x")
   161  print "($)|(2)        "$0
   162  
   163  $0=TESTSTR
   164  gsub(/.((2)|(^))/,"x")
   165  print ".((2)|(^))     "$0
   166  
   167  $0=TESTSTR
   168  gsub(/.((^)|(2))/,"x")
   169  print ".((^)|(2))     "$0
   170  
   171  $0=TESTSTR
   172  gsub(/.((2)|($))/,"x")
   173  print ".((2)|($))     "$0
   174  
   175  $0=TESTSTR
   176  gsub(/.(($)|(2))/,"x")
   177  print ".(($)|(2))     "$0
   178  
   179  $0=TESTSTR
   180  gsub(/x{0}((2)|(^))/,"x")
   181  print "x{0}((2)|(^))  "$0
   182  
   183  $0=TESTSTR
   184  gsub(/x{0}((^)|(2))/,"x")
   185  print "x{0}((^)|(2))  "$0
   186  
   187  $0=TESTSTR
   188  gsub(/x{0}((2)|($))/,"x")
   189  print "x{0}((2)|($))  "$0
   190  
   191  $0=TESTSTR
   192  gsub(/x{0}(($)|(2))/,"x")
   193  print "x{0}(($)|(2))  "$0
   194  
   195  $0=TESTSTR
   196  gsub(/x*((2)|(^))/,"x")
   197  print "x*((2)|(^))    "$0
   198  
   199  $0=TESTSTR
   200  gsub(/x*((^)|(2))/,"x")
   201  print "x*((^)|(2))    "$0
   202  
   203  $0=TESTSTR
   204  gsub(/x*((2)|($))/,"x")
   205  print "x*((2)|($))    "$0
   206  
   207  $0=TESTSTR
   208  gsub(/x*(($)|(2))/,"x")
   209  print "x*(($)|(2))    "$0
   210  
   211  $0=TESTSTR
   212  gsub(/x{0}^/,"x")
   213  print "x{0}^          "$0
   214  
   215  $0=TESTSTR
   216  gsub(/x{0}$/,"x")
   217  print "x{0}$          "$0
   218  
   219  $0=TESTSTR
   220  gsub(/(x{0}^)|2/,"x")
   221  print "(x{0}^)|2      "$0
   222  
   223  $0=TESTSTR
   224  gsub(/(x{0}$)|2/,"x")
   225  print "(x{0}$)|2      "$0
   226  
   227  
   228  }
   229  # 
   230  # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   231  # 
   232  # byefrom
   233  # 
   234  # -- 
   235  # laura fairhead  # laura@madonnaweb.com  http://lf.8k.com
   236  #                 # if you are bored crack my sig.
   237  # 1F8B0808CABB793C0000666667002D8E410E83300C04EF91F2877D00CA138A7A
   238  # EAA98F30C494480157B623C4EF1B508FDED1CEFA9152A23DE35D661593C5318E
   239  # 630C313CD701BE92E390563326EE17A3CA818F5266E4C2461547F1F5267659CA
   240  # 8EE2092F76C329ED02CA430C5373CC62FF94BAC6210B36D9F9BC4AB53378D978
   241  # 80F2978A1A6E5D6F5133B67B6113178DC1059526698AFE5C17A5187E7D930492
   242  #