github.com/benhoyt/goawk@v1.8.1/testdata/gawk/numindex.awk (about) 1 #To: bug-gnu-utils@gnu.org 2 #cc: arnold@gnu.org 3 #Subject: Possible bug in GNU Awk 3.0.4 4 #Date: Wed, 24 Nov 1999 21:47:24 +0000 5 #From: Daniel Elphick <de397@ecs.soton.ac.uk> 6 #Message-Id: <E11qkG4-0000l0-00@cameron> 7 # 8 #This is a multipart MIME message. 9 # 10 #--==_Exmh_-11192982200 11 #Content-Type: text/plain; charset=us-ascii 12 # 13 # 14 #When I use the attached awk script unique on the attached data file, it 15 #reports that all 4 lines of the data are the same. Using mawk it correctly 16 #reports that there are no repeats. 17 # 18 #I don't know if there are limits on the size of associative array keys for the 19 #purposes of reliable indexing but if there is then it is not (obviously) 20 #documented. 21 # 22 # 23 #--==_Exmh_-11192982200 24 #Content-Type: text/plain ; name="data"; charset=us-ascii 25 #Content-Description: data 26 #Content-Disposition: attachment; filename="data" 27 # 28 #322322111111112232231111 29 #322322111111112213223111 30 #322322111111112211132231 31 #322322111111112211113223 32 # 33 #--==_Exmh_-11192982200 34 #Content-Type: text/plain ; name="unique"; charset=us-ascii 35 #Content-Description: unique 36 #Content-Disposition: attachment; filename="unique" 37 # 38 { 39 if($0 in a) 40 { 41 printf("line %d has been seen before at line %d\n", NR, a[$0]) 42 repeat_count += 1 43 } 44 else 45 { 46 a[$0] = NR 47 } 48 count += 1 49 } 50 END { 51 # printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100) 52 printf("%d %f%%\n", repeat_count, repeat_count / count * 100) 53 } 54 # 55 #--==_Exmh_-11192982200--