github.com/vertgenlab/gonomics@v1.0.0/fileio/fileio.go (about) 1 // Package fileio provides wrappers of the builtin golang Reader/Writer utilities for ease of use and automatic gzip handling. 2 package fileio 3 4 import ( 5 "bufio" 6 "errors" 7 "fmt" 8 "io" 9 "log" 10 "os" 11 "sort" 12 "strings" 13 14 "github.com/vertgenlab/gonomics/exception" 15 ) 16 17 // MustCreate creates a file with the input name. 18 // Fatal/Panics when appropriate. 19 func MustCreate(filename string) *os.File { 20 if filename == "" { 21 log.Fatalf("Must write to a non-empty filename") 22 } 23 file, err := os.Create(filename) 24 if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrExist) { 25 log.Fatal(err.Error()) 26 } else { 27 exception.PanicOnErr(err) 28 } 29 return file 30 } 31 32 // MustOpen opens the input file. 33 // Fatal/Panics when appropriate. 34 func MustOpen(filename string) *os.File { 35 file, err := os.Open(filename) 36 if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { 37 log.Fatal(err.Error()) 38 } else { 39 exception.PanicOnErr(err) 40 } 41 return file 42 } 43 44 // MustRemove deletes the input file. 45 // Fatal/Panics when appropriate. 46 func MustRemove(filename string) { 47 err := os.Remove(filename) 48 if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { 49 log.Fatal(err.Error()) 50 } else { 51 exception.PanicOnErr(err) 52 } 53 } 54 55 // NextLine returns the next line of the file (might be a comment line). 56 // Returns true if the file is done. 57 func NextLine(reader *bufio.Reader) (string, bool) { 58 var line string 59 var err error 60 line, err = reader.ReadString('\n') 61 if err != nil && err != io.EOF { 62 exception.PanicOnErr(err) 63 } 64 if err == io.EOF { 65 if line != "" { 66 log.Panicf("Error: last line of file didn't end with a newline character: %s\n", line) 67 } else { 68 return "", true 69 } 70 } 71 line = strings.TrimSuffix(line, "\n") 72 line = strings.TrimSuffix(line, "\r") 73 return line, false 74 } 75 76 // NextRealLine returns the next line of the file that is not a comment line. 77 // Returns true if the file is done. 78 func NextRealLine(reader *bufio.Reader) (string, bool) { 79 var line string 80 var err error 81 for line, err = reader.ReadString('\n'); err == nil && strings.HasPrefix(line, "#"); line, err = reader.ReadString('\n') { 82 } 83 if err != nil && err != io.EOF { 84 log.Panic(err) 85 } 86 if err == io.EOF { 87 if line != "" { 88 log.Panicf("Error: last line of file didn't end with a newline character: %s\n", line) 89 } else { 90 return "", true 91 } 92 } 93 line = strings.TrimSuffix(line, "\n") 94 line = strings.TrimSuffix(line, "\r") //data generated from Windows OS contains \r\n as a two byte new line character. 95 //Here we trim off trailing carriage returns. Lines without carriage returns are unaffected. 96 return line, false 97 } 98 99 // PeekReal will advance a reader past any lines beginning with '#' and read the first n bytes without advancing the reader. 100 func PeekReal(reader *bufio.Reader, n int) ([]byte, error) { 101 var peek []byte 102 var err error 103 for peek, err = reader.Peek(1); err == nil && peek[0] == '#'; peek, err = reader.Peek(1) { 104 _, err = reader.ReadBytes('\n') // advance reader past comment line 105 if err != nil { 106 return nil, err 107 } 108 } 109 110 if err != nil { 111 return nil, err 112 } else { 113 return peek, err 114 } 115 } 116 117 // ReadHeader will advance a reader past initial lines that begin with '#', 118 // returning a slice of these comments lines and leaving the reader at 119 // the first non-comment line. 120 func ReadHeader(reader *bufio.Reader) ([]string, error) { 121 var peek []byte 122 var peekErr error 123 var header []string 124 var line string 125 for peek, peekErr = reader.Peek(1); peekErr == nil && peek[0] == '#'; peek, peekErr = reader.Peek(1) { 126 line, _ = NextLine(reader) 127 header = append(header, line) 128 } 129 130 if peekErr == io.EOF { 131 return header, nil 132 } 133 return header, peekErr 134 } 135 136 // equal returns true if two input files are identical. 137 func equal(a string, b string, commentsMatter bool) bool { 138 var fileADone, fileBDone = false, false 139 var lineA, lineB string 140 141 fA := MustOpen(a) 142 defer fA.Close() 143 fB := MustOpen(b) 144 defer fB.Close() 145 readerA := bufio.NewReader(fA) 146 readerB := bufio.NewReader(fB) 147 148 for !fileADone && !fileBDone { 149 if commentsMatter { 150 lineA, fileADone = NextLine(readerA) 151 lineB, fileBDone = NextLine(readerB) 152 } else { 153 lineA, fileADone = NextRealLine(readerA) 154 lineB, fileBDone = NextRealLine(readerB) 155 } 156 if lineA != lineB { 157 fmt.Printf("diff\n%s\n%s\n", lineA, lineB) 158 return false 159 } 160 } 161 if !fileADone || !fileBDone { 162 return false 163 } 164 return true 165 } 166 167 // AreEqualIgnoreComments returns true if input files are equal. 168 // This function ignores lines beginning with #. 169 func AreEqualIgnoreComments(a string, b string) bool { 170 return equal(a, b, false) 171 } 172 173 // AreEqual returns true if input files are equal. 174 func AreEqual(a string, b string) bool { 175 return equal(a, b, true) 176 } 177 178 // AreEqualIgnoreOrder returns true if input files contain the same lines, 179 // although the order of the lines does not matter. 180 // This program sorts the two files and compares the contents, so it is not well 181 // suited for large files as the whole contents are read into memory. 182 func AreEqualIgnoreOrder(a string, b string) bool { 183 fileA := Read(a) 184 fileB := Read(b) 185 186 if len(fileA) != len(fileB) { 187 return false 188 } 189 190 sort.Strings(fileA) 191 sort.Strings(fileB) 192 193 for i := range fileA { 194 if fileB[i] != fileA[i] { 195 return false 196 } 197 } 198 199 return true 200 } 201 202 // ReadFileToSingleLineString reads in any file type and returns contents without any \n. 203 func ReadFileToSingleLineString(filename string) string { 204 var catInput string 205 var line string 206 var doneReading bool = false 207 file := EasyOpen(filename) 208 209 for line, doneReading = EasyNextRealLine(file); !doneReading; line, doneReading = EasyNextRealLine(file) { 210 catInput = catInput + line 211 } 212 err := file.Close() 213 exception.PanicOnErr(err) 214 return catInput 215 }