github.com/benhoyt/goawk@v1.8.1/testdata/gawk/numindex.awk (about)

     1  #To: bug-gnu-utils@gnu.org
     2  #cc: arnold@gnu.org
     3  #Subject: Possible bug in GNU Awk 3.0.4
     4  #Date: Wed, 24 Nov 1999 21:47:24 +0000
     5  #From: Daniel Elphick <de397@ecs.soton.ac.uk>
     6  #Message-Id: <E11qkG4-0000l0-00@cameron>
     7  #
     8  #This is a multipart MIME message.
     9  #
    10  #--==_Exmh_-11192982200
    11  #Content-Type: text/plain; charset=us-ascii
    12  #
    13  #
    14  #When I use the attached awk script unique on the attached data file, it 
    15  #reports that all 4 lines of the data are the same. Using mawk it correctly 
    16  #reports that there are no repeats.
    17  #
    18  #I don't know if there are limits on the size of associative array keys for the 
    19  #purposes of reliable indexing but if there is then it is not (obviously) 
    20  #documented.
    21  #
    22  #
    23  #--==_Exmh_-11192982200
    24  #Content-Type: text/plain ; name="data"; charset=us-ascii
    25  #Content-Description: data
    26  #Content-Disposition: attachment; filename="data"
    27  #
    28  #322322111111112232231111
    29  #322322111111112213223111
    30  #322322111111112211132231
    31  #322322111111112211113223
    32  #
    33  #--==_Exmh_-11192982200
    34  #Content-Type: text/plain ; name="unique"; charset=us-ascii
    35  #Content-Description: unique
    36  #Content-Disposition: attachment; filename="unique"
    37  #
    38  {
    39  	if($0 in a)
    40  	{
    41  		printf("line %d has been seen before at line %d\n",  NR, a[$0])
    42  		repeat_count += 1
    43  	}
    44  	else
    45  	{
    46  		a[$0] = NR
    47  	}
    48  	count += 1
    49  }
    50  END {
    51  #	printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100)
    52  	printf("%d %f%%\n", repeat_count, repeat_count / count * 100)
    53  }
    54  #
    55  #--==_Exmh_-11192982200--