github.com/vertgenlab/gonomics@v1.0.0/fileio/fileio.go (about)

     1  // Package fileio provides wrappers of the builtin golang Reader/Writer utilities for ease of use and automatic gzip handling.
     2  package fileio
     3  
     4  import (
     5  	"bufio"
     6  	"errors"
     7  	"fmt"
     8  	"io"
     9  	"log"
    10  	"os"
    11  	"sort"
    12  	"strings"
    13  
    14  	"github.com/vertgenlab/gonomics/exception"
    15  )
    16  
    17  // MustCreate creates a file with the input name.
    18  // Fatal/Panics when appropriate.
    19  func MustCreate(filename string) *os.File {
    20  	if filename == "" {
    21  		log.Fatalf("Must write to a non-empty filename")
    22  	}
    23  	file, err := os.Create(filename)
    24  	if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrExist) {
    25  		log.Fatal(err.Error())
    26  	} else {
    27  		exception.PanicOnErr(err)
    28  	}
    29  	return file
    30  }
    31  
    32  // MustOpen opens the input file.
    33  // Fatal/Panics when appropriate.
    34  func MustOpen(filename string) *os.File {
    35  	file, err := os.Open(filename)
    36  	if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
    37  		log.Fatal(err.Error())
    38  	} else {
    39  		exception.PanicOnErr(err)
    40  	}
    41  	return file
    42  }
    43  
    44  // MustRemove deletes the input file.
    45  // Fatal/Panics when appropriate.
    46  func MustRemove(filename string) {
    47  	err := os.Remove(filename)
    48  	if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) {
    49  		log.Fatal(err.Error())
    50  	} else {
    51  		exception.PanicOnErr(err)
    52  	}
    53  }
    54  
    55  // NextLine returns the next line of the file (might be a comment line).
    56  // Returns true if the file is done.
    57  func NextLine(reader *bufio.Reader) (string, bool) {
    58  	var line string
    59  	var err error
    60  	line, err = reader.ReadString('\n')
    61  	if err != nil && err != io.EOF {
    62  		exception.PanicOnErr(err)
    63  	}
    64  	if err == io.EOF {
    65  		if line != "" {
    66  			log.Panicf("Error: last line of file didn't end with a newline character: %s\n", line)
    67  		} else {
    68  			return "", true
    69  		}
    70  	}
    71  	line = strings.TrimSuffix(line, "\n")
    72  	line = strings.TrimSuffix(line, "\r")
    73  	return line, false
    74  }
    75  
    76  // NextRealLine returns the next line of the file that is not a comment line.
    77  // Returns true if the file is done.
    78  func NextRealLine(reader *bufio.Reader) (string, bool) {
    79  	var line string
    80  	var err error
    81  	for line, err = reader.ReadString('\n'); err == nil && strings.HasPrefix(line, "#"); line, err = reader.ReadString('\n') {
    82  	}
    83  	if err != nil && err != io.EOF {
    84  		log.Panic(err)
    85  	}
    86  	if err == io.EOF {
    87  		if line != "" {
    88  			log.Panicf("Error: last line of file didn't end with a newline character: %s\n", line)
    89  		} else {
    90  			return "", true
    91  		}
    92  	}
    93  	line = strings.TrimSuffix(line, "\n")
    94  	line = strings.TrimSuffix(line, "\r") //data generated from Windows OS contains \r\n as a two byte new line character.
    95  	//Here we trim off trailing carriage returns. Lines without carriage returns are unaffected.
    96  	return line, false
    97  }
    98  
    99  // PeekReal will advance a reader past any lines beginning with '#' and read the first n bytes without advancing the reader.
   100  func PeekReal(reader *bufio.Reader, n int) ([]byte, error) {
   101  	var peek []byte
   102  	var err error
   103  	for peek, err = reader.Peek(1); err == nil && peek[0] == '#'; peek, err = reader.Peek(1) {
   104  		_, err = reader.ReadBytes('\n') // advance reader past comment line
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  	}
   109  
   110  	if err != nil {
   111  		return nil, err
   112  	} else {
   113  		return peek, err
   114  	}
   115  }
   116  
   117  // ReadHeader will advance a reader past initial lines that begin with '#',
   118  // returning a slice of these comments lines and leaving the reader at
   119  // the first non-comment line.
   120  func ReadHeader(reader *bufio.Reader) ([]string, error) {
   121  	var peek []byte
   122  	var peekErr error
   123  	var header []string
   124  	var line string
   125  	for peek, peekErr = reader.Peek(1); peekErr == nil && peek[0] == '#'; peek, peekErr = reader.Peek(1) {
   126  		line, _ = NextLine(reader)
   127  		header = append(header, line)
   128  	}
   129  
   130  	if peekErr == io.EOF {
   131  		return header, nil
   132  	}
   133  	return header, peekErr
   134  }
   135  
   136  // equal returns true if two input files are identical.
   137  func equal(a string, b string, commentsMatter bool) bool {
   138  	var fileADone, fileBDone = false, false
   139  	var lineA, lineB string
   140  
   141  	fA := MustOpen(a)
   142  	defer fA.Close()
   143  	fB := MustOpen(b)
   144  	defer fB.Close()
   145  	readerA := bufio.NewReader(fA)
   146  	readerB := bufio.NewReader(fB)
   147  
   148  	for !fileADone && !fileBDone {
   149  		if commentsMatter {
   150  			lineA, fileADone = NextLine(readerA)
   151  			lineB, fileBDone = NextLine(readerB)
   152  		} else {
   153  			lineA, fileADone = NextRealLine(readerA)
   154  			lineB, fileBDone = NextRealLine(readerB)
   155  		}
   156  		if lineA != lineB {
   157  			fmt.Printf("diff\n%s\n%s\n", lineA, lineB)
   158  			return false
   159  		}
   160  	}
   161  	if !fileADone || !fileBDone {
   162  		return false
   163  	}
   164  	return true
   165  }
   166  
   167  // AreEqualIgnoreComments returns true if input files are equal.
   168  // This function ignores lines beginning with #.
   169  func AreEqualIgnoreComments(a string, b string) bool {
   170  	return equal(a, b, false)
   171  }
   172  
   173  // AreEqual returns true if input files are equal.
   174  func AreEqual(a string, b string) bool {
   175  	return equal(a, b, true)
   176  }
   177  
   178  // AreEqualIgnoreOrder returns true if input files contain the same lines,
   179  // although the order of the lines does not matter.
   180  // This program sorts the two files and compares the contents, so it is not well
   181  // suited for large files as the whole contents are read into memory.
   182  func AreEqualIgnoreOrder(a string, b string) bool {
   183  	fileA := Read(a)
   184  	fileB := Read(b)
   185  
   186  	if len(fileA) != len(fileB) {
   187  		return false
   188  	}
   189  
   190  	sort.Strings(fileA)
   191  	sort.Strings(fileB)
   192  
   193  	for i := range fileA {
   194  		if fileB[i] != fileA[i] {
   195  			return false
   196  		}
   197  	}
   198  
   199  	return true
   200  }
   201  
   202  // ReadFileToSingleLineString reads in any file type and returns contents without any \n.
   203  func ReadFileToSingleLineString(filename string) string {
   204  	var catInput string
   205  	var line string
   206  	var doneReading bool = false
   207  	file := EasyOpen(filename)
   208  
   209  	for line, doneReading = EasyNextRealLine(file); !doneReading; line, doneReading = EasyNextRealLine(file) {
   210  		catInput = catInput + line
   211  	}
   212  	err := file.Close()
   213  	exception.PanicOnErr(err)
   214  	return catInput
   215  }