github.com/jxskiss/gopkg/v2@v2.14.9-0.20240514120614-899f3e7952b4/utils/strutil/bom.go (about)

     1  package strutil
     2  
     3  import (
     4  	"bufio"
     5  	"io"
     6  )
     7  
     8  // Unicode byte order mark (BOM) constants.
     9  //
    10  // Reference:
    11  // https://en.wikipedia.org/wiki/Byte_order_mark
    12  // and https://www.unicode.org/faq/utf_bom.html
    13  //
    14  //nolint:all
    15  const (
    16  	BOM_UTF8               = "\xEF\xBB\xBF"
    17  	BOM_UTF16_BigEndian    = "\xFE\xFF"
    18  	BOM_UTF16_LittleEndian = "\xFF\xFE"
    19  	BOM_UTF32_BigEndian    = "\x00\x00\xFE\xFF"
    20  	BOM_UTF32_LittleEndian = "\xFF\xFE\x00\x00"
    21  )
    22  
    23  // DetectBOM detects BOM prefix from a byte slice.
    24  func DetectBOM(b []byte) (bom string) {
    25  	if len(b) >= 4 {
    26  		first4 := string(b[:4])
    27  		if first4 == BOM_UTF32_BigEndian || first4 == BOM_UTF32_LittleEndian {
    28  			return first4
    29  		}
    30  	}
    31  	if len(b) >= 3 {
    32  		first3 := string(b[:3])
    33  		if first3 == BOM_UTF8 {
    34  			return first3
    35  		}
    36  	}
    37  	if len(b) >= 2 {
    38  		first2 := string(b[:2])
    39  		if first2 == BOM_UTF16_BigEndian || first2 == BOM_UTF16_LittleEndian {
    40  			return first2
    41  		}
    42  	}
    43  	return ""
    44  }
    45  
    46  // TrimBOM detects and trims BOM prefix from a byte slice, the returned
    47  // byte slice shares the same underlying memory with the given slice.
    48  func TrimBOM(b []byte) []byte {
    49  	bom := DetectBOM(b)
    50  	if bom == "" {
    51  		return b
    52  	}
    53  	return b[len(bom):]
    54  }
    55  
    56  // SkipBOMReader detects and skips BOM prefix from the given io.Reader.
    57  // It returns a *bufio.Reader.
    58  func SkipBOMReader(rd io.Reader) io.Reader {
    59  	buf := bufio.NewReader(rd)
    60  	first, err := buf.Peek(4)
    61  	if err != nil {
    62  		first, err = buf.Peek(3)
    63  		if err != nil {
    64  			first, err = buf.Peek(2)
    65  			if err != nil { // not enough data
    66  				return buf
    67  			}
    68  		}
    69  	}
    70  	bom := DetectBOM(first)
    71  	_, _ = buf.Discard(len(bom))
    72  	return buf
    73  }