github.com/andrewrech/ih-abstract@v0.0.0-20210322142951-2fec1c8d0f38/unique.go (about) 1 package main 2 3 import ( 4 "log" 5 "os" 6 "strings" 7 ) 8 9 // prevUnq adds previously identified unique strings from an existing output file to a hash map. 10 func prevUnq(f string) (r *Records) { 11 var records Records 12 records.Store = make(Store) 13 14 r = &records 15 16 if _, err := os.Stat(f); err == nil { 17 log.Println("reading patterns from existing records file", f) 18 r = Existing(&f) 19 } else { 20 log.Println("existing records file", f, "does not exist, skipping diff") 21 } 22 23 return r 24 } 25 26 // DiffUnq identifies unique strings from an input stream and compares the unique strings to an existing output file. The function returns 1) unique strings and 2) new strings compared to the existing output file. 27 func DiffUnq(in chan []string, name string) (channels map[string](chan []string), done chan struct{}) { 28 done = make(chan struct{}) 29 30 var buf int64 = 1e7 31 32 // channels contains communication of rows 33 // between goroutines processing data 34 channels = make(map[string](chan []string)) 35 36 // add to an existing records map if 37 // if CSV output already exists 38 unqRecordsName := strings.Join([]string{name, "-unique-strings"}, "") 39 unqRecordsNameNew := strings.Join([]string{name, "-unique-strings-new"}, "") 40 41 channels[unqRecordsName] = make(chan []string, buf) 42 43 channels[unqRecordsNameNew] = make(chan []string, buf) 44 45 // read previous output 46 f := strings.Join([]string{name, "-unique-strings.csv"}, "") 47 prevResults := prevUnq(f) 48 49 var records Records 50 records.Store = make(Store) 51 currentResults := &records 52 53 go func() { 54 for l := range in { // for each slice 55 for _, s := range l { // each string of slice 56 57 i := []string{s} 58 59 existsCurrent, err := currentResults.Check(&i) 60 if err != nil { 61 log.Fatalln(err) 62 } 63 64 // string does not exist in current records 65 if !existsCurrent { 66 err = currentResults.Add(&i) 67 if err != nil { 68 log.Fatalln(err) 69 } 70 71 channels[unqRecordsName] <- []string{s} 72 } 73 74 // string does not exist in previous records 75 existsPrev, err := prevResults.Check(&i) 76 if err != nil { 77 log.Fatalln(err) 78 } 79 80 if !existsPrev { 81 err = prevResults.Add(&i) 82 if err != nil { 83 log.Fatalln(err) 84 } 85 86 log.Println("New string:", s) 87 channels[unqRecordsNameNew] <- []string{s} 88 } 89 } 90 } 91 92 close(channels[unqRecordsName]) 93 close(channels[unqRecordsNameNew]) 94 done <- struct{}{} 95 }() 96 97 return channels, done 98 }