github.com/benhoyt/goawk@v1.8.1/testdata/gawk/wideidx2.awk (about)

     1  # Date: Thu, 27 Apr 2006 20:59:03 +0100
     2  # From: Lee Haywood <ljhaywood2@googlemail.com>
     3  # Subject: gawk multi-byte support bugs, assertion bug and fix.
     4  # To: bug-gawk@gnu.org
     5  # Message-id: <60962be00604271259na0d8fdayb9d0c69a853216e8@mail.gmail.com>
     6  # MIME-version: 1.0
     7  # Content-type: multipart/alternative;
     8  #  boundary="----=_Part_10136_920879.1146167943492"
     9  # Status: RO
    10  # 
    11  # ------=_Part_10136_920879.1146167943492
    12  # Content-Type: text/plain; charset=ISO-8859-1
    13  # Content-Transfer-Encoding: quoted-printable
    14  # Content-Disposition: inline
    15  # 
    16  # 
    17  # Firstly, I have been getting the following error from version 3.1.5.
    18  # 
    19  #     awk: node.c:515: unref: Assertion `(tmp->flags & 4096) !=3D 0' failed.
    20  # 
    21  # In mk_number() in node.c the MBS_SUPPORT code is inside the GAWKDEBUG
    22  # section - moving it outside explicitly clears the string values, which
    23  # prevents the assertion error from occurring.  The corrected version is
    24  # shown at the end of this message.
    25  # 
    26  # As an aside, I also noticed that n->wstptr is not cleared by
    27  # set_field() and set_record() in field.c when the flags are set to
    28  # exclude WSTRCUR.  However, I do not have a test case to show if
    29  # changing them makes any difference.
    30  # 
    31  # A second problem also occurs when gawk 3.1.5 is compiled with
    32  # multi-byte character support (MBS_SUPPORT).  The following code should
    33  # change the index of the substring "bc" from 2 to 3, but it gets
    34  # reported as 2 in both cases - which is obviously disastrous.
    35  # 
    36  #     awk 'BEGIN {
    37  #             Value =3D "abc"
    38  # 
    39  #             print "Before <" Value "> ",
    40  #                   index( Value, "bc" )
    41  # 
    42  #             sub( /bc/, "bbc", Value )
    43  # 
    44  #             print "After  <" Value ">",
    45  #                   index( Value, "bc" )
    46  #         }'
    47  # 
    48  # Compiling with MBS_SUPPORT undefined makes these problems go away.
    49  # 
    50  # /* mk_number --- allocate a node with defined number */
    51  # 
    52  # NODE *
    53  # mk_number(AWKNUM x, unsigned int flags)
    54  # {
    55  #         register NODE *r;
    56  # 
    57  #         getnode(r);
    58  #         r->type =3D Node_val;
    59  #         r->numbr =3D x;
    60  #         r->flags =3D flags;
    61  # #if defined MBS_SUPPORT
    62  #         r->wstptr =3D NULL;
    63  #         r->wstlen =3D 0;
    64  # #endif /* MBS_SUPPORT */
    65  # #ifdef GAWKDEBUG
    66  #         r->stref =3D 1;
    67  #         r->stptr =3D NULL;
    68  #         r->stlen =3D 0;
    69  # #if defined MBS_SUPPORT
    70  #         r->flags &=3D ~WSTRCUR;
    71  # #endif /* MBS_SUPPORT */
    72  # #endif /* GAWKDEBUG */
    73  #         return r;
    74  # }
    75  # 
    76  # Thanks.
    77  # 
    78  # --
    79  # Lee Haywood.
    80  
    81  BEGIN {
    82  	Value = "abc"
    83  
    84  	print "Before <" Value "> ", index( Value, "bc" )
    85   
    86  	sub( /bc/, "bbc", Value )
    87  
    88  	print "After  <" Value ">", index( Value, "bc" )
    89  }