github.com/pachyderm/pachyderm@v1.13.4/examples/word_count/src/map.go (about)

     1  package main
     2  
     3  import (
     4  	"bufio"
     5  	"flag"
     6  	"io/ioutil"
     7  	"log"
     8  	"os"
     9  	"path/filepath"
    10  	"regexp"
    11  	"strconv"
    12  	"strings"
    13  )
    14  
    15  var (
    16  	reg       *regexp.Regexp
    17  	inputDir  string
    18  	outputDir string
    19  )
    20  
    21  func sanitize(word string) []string {
    22  	sanitized := reg.ReplaceAllString(word, " ")
    23  	return strings.Split(strings.ToLower(sanitized), " ")
    24  }
    25  
    26  func main() {
    27  	flag.Parse()
    28  	args := flag.Args()
    29  	if len(args) != 2 {
    30  		log.Fatalf("expect two arguments; got %v", len(args))
    31  	}
    32  
    33  	var err error
    34  	reg, err = regexp.Compile(`[^A-Za-z]+`)
    35  	if err != nil {
    36  		log.Fatal(err)
    37  	}
    38  
    39  	inputDir = args[0]
    40  	outputDir = args[1]
    41  
    42  	wordMap := make(map[string]int)
    43  	if err := filepath.Walk(inputDir, func(path string, info os.FileInfo, _ error) error {
    44  		if info.IsDir() {
    45  			return nil
    46  		}
    47  
    48  		log.Printf("scanning %v", path)
    49  		f, err := os.Open(path)
    50  		if err != nil {
    51  			return err
    52  		}
    53  
    54  		scanner := bufio.NewScanner(f)
    55  		scanner.Split(bufio.ScanWords)
    56  		count := 0
    57  		for scanner.Scan() {
    58  			count += 1
    59  			for _, word := range sanitize(scanner.Text()) {
    60  				if word != "" {
    61  					wordMap[word] = wordMap[word] + 1
    62  				}
    63  			}
    64  		}
    65  
    66  		if err := scanner.Err(); err != nil {
    67  			return err
    68  		}
    69  
    70  		log.Printf("found %d words in %s", count, path)
    71  
    72  		if err := f.Close(); err != nil {
    73  			return err
    74  		}
    75  		return nil
    76  	}); err != nil {
    77  		log.Fatal(err)
    78  	}
    79  
    80  	for word, count := range wordMap {
    81  		if err := ioutil.WriteFile(filepath.Join(outputDir, word), []byte(strconv.Itoa(count)+"\n"), 0644); err != nil {
    82  			log.Fatal(err)
    83  		}
    84  	}
    85  }