github.com/jxskiss/gopkg@v0.17.3/strutil/bom.go (about)

     1  package strutil
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  )
     7  
     8  // Unicode byte order mark (BOM) constants.
     9  //
    10  // Reference:
    11  // https://en.wikipedia.org/wiki/Byte_order_mark
    12  // and https://www.unicode.org/faq/utf_bom.html
    13  const (
    14  	BOM_UTF8               = "\xEF\xBB\xBF"
    15  	BOM_UTF16_BigEndian    = "\xFE\xFF"
    16  	BOM_UTF16_LittleEndian = "\xFF\xFE"
    17  	BOM_UTF32_BigEndian    = "\x00\x00\xFE\xFF"
    18  	BOM_UTF32_LittleEndian = "\xFF\xFE\x00\x00"
    19  )
    20  
    21  // DetectBOM detects BOM prefix from a byte slice.
    22  func DetectBOM(b []byte) (bom string) {
    23  	if len(b) >= 4 {
    24  		first4 := string(b[:4])
    25  		if first4 == BOM_UTF32_BigEndian || first4 == BOM_UTF32_LittleEndian {
    26  			return first4
    27  		}
    28  	}
    29  	if len(b) >= 3 {
    30  		first3 := string(b[:3])
    31  		if first3 == BOM_UTF8 {
    32  			return first3
    33  		}
    34  	}
    35  	if len(b) >= 2 {
    36  		first2 := string(b[:2])
    37  		if first2 == BOM_UTF16_BigEndian || first2 == BOM_UTF16_LittleEndian {
    38  			return first2
    39  		}
    40  	}
    41  	return ""
    42  }
    43  
    44  // TrimBOM detects and trims BOM prefix from a byte slice, the returned
    45  // byte slice shares the same underlying memory with the given slice.
    46  func TrimBOM(b []byte) []byte {
    47  	bom := DetectBOM(b)
    48  	if bom == "" {
    49  		return b
    50  	}
    51  	return b[len(bom):]
    52  }
    53  
    54  // SkipBOMReader detects and skips BOM prefix from the given io.Reader.
    55  // It returns a *bufio.Reader.
    56  func SkipBOMReader(rd io.Reader) io.Reader {
    57  	buf := bufio.NewReader(rd)
    58  	first, err := buf.Peek(4)
    59  	if err != nil {
    60  		first, err = buf.Peek(3)
    61  		if err != nil {
    62  			first, err = buf.Peek(2)
    63  			if err != nil { // not enough data
    64  				return buf
    65  			}
    66  		}
    67  	}
    68  	bom := DetectBOM(first)
    69  	_, _ = buf.Discard(len(bom))
    70  	return buf
    71  }