github.com/jxskiss/gopkg/v2@v2.14.9-0.20240514120614-899f3e7952b4/utils/strutil/bom.go (about) 1 package strutil 2 3 import ( 4 "bufio" 5 "io" 6 ) 7 8 // Unicode byte order mark (BOM) constants. 9 // 10 // Reference: 11 // https://en.wikipedia.org/wiki/Byte_order_mark 12 // and https://www.unicode.org/faq/utf_bom.html 13 // 14 //nolint:all 15 const ( 16 BOM_UTF8 = "\xEF\xBB\xBF" 17 BOM_UTF16_BigEndian = "\xFE\xFF" 18 BOM_UTF16_LittleEndian = "\xFF\xFE" 19 BOM_UTF32_BigEndian = "\x00\x00\xFE\xFF" 20 BOM_UTF32_LittleEndian = "\xFF\xFE\x00\x00" 21 ) 22 23 // DetectBOM detects BOM prefix from a byte slice. 24 func DetectBOM(b []byte) (bom string) { 25 if len(b) >= 4 { 26 first4 := string(b[:4]) 27 if first4 == BOM_UTF32_BigEndian || first4 == BOM_UTF32_LittleEndian { 28 return first4 29 } 30 } 31 if len(b) >= 3 { 32 first3 := string(b[:3]) 33 if first3 == BOM_UTF8 { 34 return first3 35 } 36 } 37 if len(b) >= 2 { 38 first2 := string(b[:2]) 39 if first2 == BOM_UTF16_BigEndian || first2 == BOM_UTF16_LittleEndian { 40 return first2 41 } 42 } 43 return "" 44 } 45 46 // TrimBOM detects and trims BOM prefix from a byte slice, the returned 47 // byte slice shares the same underlying memory with the given slice. 48 func TrimBOM(b []byte) []byte { 49 bom := DetectBOM(b) 50 if bom == "" { 51 return b 52 } 53 return b[len(bom):] 54 } 55 56 // SkipBOMReader detects and skips BOM prefix from the given io.Reader. 57 // It returns a *bufio.Reader. 58 func SkipBOMReader(rd io.Reader) io.Reader { 59 buf := bufio.NewReader(rd) 60 first, err := buf.Peek(4) 61 if err != nil { 62 first, err = buf.Peek(3) 63 if err != nil { 64 first, err = buf.Peek(2) 65 if err != nil { // not enough data 66 return buf 67 } 68 } 69 } 70 bom := DetectBOM(first) 71 _, _ = buf.Discard(len(bom)) 72 return buf 73 }