code.gitea.io/gitea@v1.19.3/modules/typesniffer/typesniffer.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package typesniffer 5 6 import ( 7 "bytes" 8 "fmt" 9 "io" 10 "net/http" 11 "regexp" 12 "strings" 13 14 "code.gitea.io/gitea/modules/util" 15 ) 16 17 // Use at most this many bytes to determine Content Type. 18 const sniffLen = 1024 19 20 const ( 21 // SvgMimeType MIME type of SVG images. 22 SvgMimeType = "image/svg+xml" 23 // ApplicationOctetStream MIME type of binary files. 24 ApplicationOctetStream = "application/octet-stream" 25 ) 26 27 var ( 28 svgComment = regexp.MustCompile(`(?s)<!--.*?-->`) 29 svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) 30 svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg\b`) 31 ) 32 33 // SniffedType contains information about a blobs type. 34 type SniffedType struct { 35 contentType string 36 } 37 38 // IsText etects if content format is plain text. 39 func (ct SniffedType) IsText() bool { 40 return strings.Contains(ct.contentType, "text/") 41 } 42 43 // IsImage detects if data is an image format 44 func (ct SniffedType) IsImage() bool { 45 return strings.Contains(ct.contentType, "image/") 46 } 47 48 // IsSvgImage detects if data is an SVG image format 49 func (ct SniffedType) IsSvgImage() bool { 50 return strings.Contains(ct.contentType, SvgMimeType) 51 } 52 53 // IsPDF detects if data is a PDF format 54 func (ct SniffedType) IsPDF() bool { 55 return strings.Contains(ct.contentType, "application/pdf") 56 } 57 58 // IsVideo detects if data is an video format 59 func (ct SniffedType) IsVideo() bool { 60 return strings.Contains(ct.contentType, "video/") 61 } 62 63 // IsAudio detects if data is an video format 64 func (ct SniffedType) IsAudio() bool { 65 return strings.Contains(ct.contentType, "audio/") 66 } 67 68 // IsRepresentableAsText returns true if file content can be represented as 69 // plain text or is empty. 70 func (ct SniffedType) IsRepresentableAsText() bool { 71 return ct.IsText() || ct.IsSvgImage() 72 } 73 74 // IsBrowsableType returns whether a non-text type can be displayed in a browser 75 func (ct SniffedType) IsBrowsableBinaryType() bool { 76 return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio() 77 } 78 79 // GetMimeType returns the mime type 80 func (ct SniffedType) GetMimeType() string { 81 return strings.SplitN(ct.contentType, ";", 2)[0] 82 } 83 84 // DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty. 85 func DetectContentType(data []byte) SniffedType { 86 if len(data) == 0 { 87 return SniffedType{"text/unknown"} 88 } 89 90 ct := http.DetectContentType(data) 91 92 if len(data) > sniffLen { 93 data = data[:sniffLen] 94 } 95 96 // SVG is unsupported by http.DetectContentType, https://github.com/golang/go/issues/15888 97 98 detectByHTML := strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html") 99 detectByXML := strings.Contains(ct, "text/xml") 100 if detectByHTML || detectByXML { 101 dataProcessed := svgComment.ReplaceAll(data, nil) 102 dataProcessed = bytes.TrimSpace(dataProcessed) 103 if detectByHTML && svgTagRegex.Match(dataProcessed) || 104 detectByXML && svgTagInXMLRegex.Match(dataProcessed) { 105 ct = SvgMimeType 106 } 107 } 108 109 if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) { 110 // The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg". 111 // So remove the "ID3" prefix and detect again, if result is text, then it must be text content. 112 // This works especially because audio files contain many unprintable/invalid characters like `0x00` 113 ct2 := http.DetectContentType(data[3:]) 114 if strings.HasPrefix(ct2, "text/") { 115 ct = ct2 116 } 117 } 118 119 return SniffedType{ct} 120 } 121 122 // DetectContentTypeFromReader guesses the content type contained in the reader. 123 func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) { 124 buf := make([]byte, sniffLen) 125 n, err := util.ReadAtMost(r, buf) 126 if err != nil { 127 return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err) 128 } 129 buf = buf[:n] 130 131 return DetectContentType(buf), nil 132 }