github.com/pachyderm/pachyderm@v1.13.4/examples/word_count/src/map.go (about) 1 package main 2 3 import ( 4 "bufio" 5 "flag" 6 "io/ioutil" 7 "log" 8 "os" 9 "path/filepath" 10 "regexp" 11 "strconv" 12 "strings" 13 ) 14 15 var ( 16 reg *regexp.Regexp 17 inputDir string 18 outputDir string 19 ) 20 21 func sanitize(word string) []string { 22 sanitized := reg.ReplaceAllString(word, " ") 23 return strings.Split(strings.ToLower(sanitized), " ") 24 } 25 26 func main() { 27 flag.Parse() 28 args := flag.Args() 29 if len(args) != 2 { 30 log.Fatalf("expect two arguments; got %v", len(args)) 31 } 32 33 var err error 34 reg, err = regexp.Compile(`[^A-Za-z]+`) 35 if err != nil { 36 log.Fatal(err) 37 } 38 39 inputDir = args[0] 40 outputDir = args[1] 41 42 wordMap := make(map[string]int) 43 if err := filepath.Walk(inputDir, func(path string, info os.FileInfo, _ error) error { 44 if info.IsDir() { 45 return nil 46 } 47 48 log.Printf("scanning %v", path) 49 f, err := os.Open(path) 50 if err != nil { 51 return err 52 } 53 54 scanner := bufio.NewScanner(f) 55 scanner.Split(bufio.ScanWords) 56 count := 0 57 for scanner.Scan() { 58 count += 1 59 for _, word := range sanitize(scanner.Text()) { 60 if word != "" { 61 wordMap[word] = wordMap[word] + 1 62 } 63 } 64 } 65 66 if err := scanner.Err(); err != nil { 67 return err 68 } 69 70 log.Printf("found %d words in %s", count, path) 71 72 if err := f.Close(); err != nil { 73 return err 74 } 75 return nil 76 }); err != nil { 77 log.Fatal(err) 78 } 79 80 for word, count := range wordMap { 81 if err := ioutil.WriteFile(filepath.Join(outputDir, word), []byte(strconv.Itoa(count)+"\n"), 0644); err != nil { 82 log.Fatal(err) 83 } 84 } 85 }