github.com/keysonZZZ/kmg@v0.0.0-20151121023212-05317bfd7d39/encoding/kmgTextEncoding/Encoding.go (about) 1 package kmgTextEncoding 2 3 import ( 4 "bytes" 5 "io/ioutil" 6 "net/http" 7 "strings" 8 9 "github.com/PuerkitoBio/goquery" 10 "github.com/bronze1man/kmg/kmgNet/kmgHttp" 11 "golang.org/x/text/encoding/japanese" 12 "golang.org/x/text/transform" 13 ) 14 15 type encodingType string 16 17 const ( 18 Utf8 encodingType = "utf-8" 19 ShiftJis encodingType = "shift_jis" 20 ) 21 22 var encodingGuessList []encodingType = []encodingType{ 23 ShiftJis, 24 Utf8, 25 } 26 27 //目前只处理了编码是 shift_jis 时的情况 28 func HttpResponseToUtf8(res *http.Response) (out []byte) { 29 body := kmgHttp.MustResponseReadAllBody(res) 30 for _, encoding := range encodingGuessList { 31 if !isResponseEncodingBy(encoding, res, body) { 32 continue 33 } 34 if encoding == ShiftJis { 35 tReader := transform.NewReader(bytes.NewReader(body), japanese.ShiftJIS.NewDecoder()) 36 var err error 37 out, err = ioutil.ReadAll(tReader) 38 if err != nil { 39 panic(err) 40 } 41 return out 42 } 43 if encoding == Utf8 { 44 return body 45 } 46 } 47 //没猜到,原样返回 48 return body 49 } 50 51 func isResponseEncodingBy(encoding encodingType, res *http.Response, responseBody []byte) bool { 52 contentType := res.Header.Get("Content-Type") 53 charset := getCharsetFromHttpContentType(contentType) 54 if charset == string(encoding) { 55 return true 56 } 57 if charset != "" { 58 return false 59 } 60 dom, err := goquery.NewDocumentFromReader(bytes.NewReader(responseBody)) 61 if err != nil { 62 panic(err) 63 } 64 contentType, ok := dom.Find(`meta[http-equiv="content-type"]`).Eq(0).Attr("content") 65 if !ok { 66 return false 67 } 68 charset = getCharsetFromHttpContentType(contentType) 69 return charset == string(encoding) 70 } 71 72 func getCharsetFromHttpContentType(contentType string) string { 73 list := strings.Split(contentType, "charset=") 74 if len(list) == 1 { 75 return "" 76 } 77 return strings.ToLower(list[len(list)-1]) 78 }