github.com/benhoyt/goawk@v1.8.1/testdata/gawk/longwrds.awk (about)

     1  # From Gawk Manual modified by bug fix and removal of punctuation
     2  
     3  # Invoker can customize sort command if necessary.
     4  BEGIN {
     5  	if (!SORT) SORT = "LC_ALL=C sort"
     6  }
     7  
     8  # Record every word which is used at least once
     9  {
    10  	for (i = 1; i <= NF; i++) {
    11  		tmp = tolower($i)
    12  		if (0 != (pos = match(tmp, /([[:lower:]]|-)+/)))
    13  			used[substr(tmp, pos, RLENGTH)] = 1
    14  	}
    15  }
    16  
    17  #Find a number of distinct words longer than 10 characters
    18  END {
    19  	num_long_words = 0
    20  	for (x in used) 
    21  		if (length(x) > 10) {
    22  			++num_long_words
    23  			print x | SORT
    24  		}
    25  	print(num_long_words, "long words") | SORT
    26  	close(SORT)
    27  }