github.com/jxskiss/gopkg@v0.17.3/strutil/bom.go (about) 1 package strutil 2 3 import ( 4 "bufio" 5 "io" 6 ) 7 8 // Unicode byte order mark (BOM) constants. 9 // 10 // Reference: 11 // https://en.wikipedia.org/wiki/Byte_order_mark 12 // and https://www.unicode.org/faq/utf_bom.html 13 const ( 14 BOM_UTF8 = "\xEF\xBB\xBF" 15 BOM_UTF16_BigEndian = "\xFE\xFF" 16 BOM_UTF16_LittleEndian = "\xFF\xFE" 17 BOM_UTF32_BigEndian = "\x00\x00\xFE\xFF" 18 BOM_UTF32_LittleEndian = "\xFF\xFE\x00\x00" 19 ) 20 21 // DetectBOM detects BOM prefix from a byte slice. 22 func DetectBOM(b []byte) (bom string) { 23 if len(b) >= 4 { 24 first4 := string(b[:4]) 25 if first4 == BOM_UTF32_BigEndian || first4 == BOM_UTF32_LittleEndian { 26 return first4 27 } 28 } 29 if len(b) >= 3 { 30 first3 := string(b[:3]) 31 if first3 == BOM_UTF8 { 32 return first3 33 } 34 } 35 if len(b) >= 2 { 36 first2 := string(b[:2]) 37 if first2 == BOM_UTF16_BigEndian || first2 == BOM_UTF16_LittleEndian { 38 return first2 39 } 40 } 41 return "" 42 } 43 44 // TrimBOM detects and trims BOM prefix from a byte slice, the returned 45 // byte slice shares the same underlying memory with the given slice. 46 func TrimBOM(b []byte) []byte { 47 bom := DetectBOM(b) 48 if bom == "" { 49 return b 50 } 51 return b[len(bom):] 52 } 53 54 // SkipBOMReader detects and skips BOM prefix from the given io.Reader. 55 // It returns a *bufio.Reader. 56 func SkipBOMReader(rd io.Reader) io.Reader { 57 buf := bufio.NewReader(rd) 58 first, err := buf.Peek(4) 59 if err != nil { 60 first, err = buf.Peek(3) 61 if err != nil { 62 first, err = buf.Peek(2) 63 if err != nil { // not enough data 64 return buf 65 } 66 } 67 } 68 bom := DetectBOM(first) 69 _, _ = buf.Discard(len(bom)) 70 return buf 71 }