code.gitea.io/gitea@v1.22.3/modules/typesniffer/typesniffer_test.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package typesniffer 5 6 import ( 7 "bytes" 8 "encoding/base64" 9 "encoding/hex" 10 "strings" 11 "testing" 12 13 "github.com/stretchr/testify/assert" 14 ) 15 16 func TestDetectContentTypeLongerThanSniffLen(t *testing.T) { 17 // Pre-condition: Shorter than sniffLen detects SVG. 18 assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType) 19 // Longer than sniffLen detects something else. 20 assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType) 21 } 22 23 func TestIsTextFile(t *testing.T) { 24 assert.True(t, DetectContentType([]byte{}).IsText()) 25 assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText()) 26 } 27 28 func TestIsSvgImage(t *testing.T) { 29 assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage()) 30 assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage()) 31 assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage()) 32 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage()) 33 assert.True(t, DetectContentType([]byte(`<!-- Comment --> 34 <svg></svg>`)).IsSvgImage()) 35 assert.True(t, DetectContentType([]byte(`<!-- Multiple --> 36 <!-- Comments --> 37 <svg></svg>`)).IsSvgImage()) 38 assert.True(t, DetectContentType([]byte(`<!-- Multiline 39 Comment --> 40 <svg></svg>`)).IsSvgImage()) 41 assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN" 42 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd"> 43 <svg></svg>`)).IsSvgImage()) 44 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 45 <!-- Comment --> 46 <svg></svg>`)).IsSvgImage()) 47 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 48 <!-- Multiple --> 49 <!-- Comments --> 50 <svg></svg>`)).IsSvgImage()) 51 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 52 <!-- Multline 53 Comment --> 54 <svg></svg>`)).IsSvgImage()) 55 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 56 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> 57 <!-- Multline 58 Comment --> 59 <svg></svg>`)).IsSvgImage()) 60 61 // the DetectContentType should work for incomplete data, because only beginning bytes are used for detection 62 assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage()) 63 64 assert.False(t, DetectContentType([]byte{}).IsSvgImage()) 65 assert.False(t, DetectContentType([]byte("svg")).IsSvgImage()) 66 assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage()) 67 assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage()) 68 assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage()) 69 assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage()) 70 assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment --> 71 <foo></foo>`)).IsSvgImage()) 72 assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 73 <!-- <svg></svg> inside comment --> 74 <foo></foo>`)).IsSvgImage()) 75 76 assert.False(t, DetectContentType([]byte(` 77 <!-- comment1 --> 78 <div> 79 <!-- comment2 --> 80 <svg></svg> 81 </div> 82 `)).IsSvgImage()) 83 84 assert.False(t, DetectContentType([]byte(` 85 <!-- comment1 86 --> 87 <div> 88 <!-- comment2 89 --> 90 <svg></svg> 91 </div> 92 `)).IsSvgImage()) 93 assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage()) 94 assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage()) 95 } 96 97 func TestIsPDF(t *testing.T) { 98 pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe") 99 assert.True(t, DetectContentType(pdf).IsPDF()) 100 assert.False(t, DetectContentType([]byte("plain text")).IsPDF()) 101 } 102 103 func TestIsVideo(t *testing.T) { 104 mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA") 105 assert.True(t, DetectContentType(mp4).IsVideo()) 106 assert.False(t, DetectContentType([]byte("plain text")).IsVideo()) 107 } 108 109 func TestIsAudio(t *testing.T) { 110 mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") 111 assert.True(t, DetectContentType(mp3).IsAudio()) 112 assert.False(t, DetectContentType([]byte("plain text")).IsAudio()) 113 114 assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio()) 115 assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text 116 assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char 117 } 118 119 func TestDetectContentTypeFromReader(t *testing.T) { 120 mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") 121 st, err := DetectContentTypeFromReader(bytes.NewReader(mp3)) 122 assert.NoError(t, err) 123 assert.True(t, st.IsAudio()) 124 } 125 126 func TestDetectContentTypeOgg(t *testing.T) { 127 oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000") 128 st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio)) 129 assert.NoError(t, err) 130 assert.True(t, st.IsAudio()) 131 132 oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001") 133 st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo)) 134 assert.NoError(t, err) 135 assert.True(t, st.IsVideo()) 136 }