github.com/benhoyt/goawk@v1.8.1/testdata/gawk/longwrds.awk (about) 1 # From Gawk Manual modified by bug fix and removal of punctuation 2 3 # Invoker can customize sort command if necessary. 4 BEGIN { 5 if (!SORT) SORT = "LC_ALL=C sort" 6 } 7 8 # Record every word which is used at least once 9 { 10 for (i = 1; i <= NF; i++) { 11 tmp = tolower($i) 12 if (0 != (pos = match(tmp, /([[:lower:]]|-)+/))) 13 used[substr(tmp, pos, RLENGTH)] = 1 14 } 15 } 16 17 #Find a number of distinct words longer than 10 characters 18 END { 19 num_long_words = 0 20 for (x in used) 21 if (length(x) > 10) { 22 ++num_long_words 23 print x | SORT 24 } 25 print(num_long_words, "long words") | SORT 26 close(SORT) 27 }