code.gitea.io/gitea@v1.19.3/modules/typesniffer/typesniffer_test.go (about) 1 // Copyright 2021 The Gitea Authors. All rights reserved. 2 // SPDX-License-Identifier: MIT 3 4 package typesniffer 5 6 import ( 7 "bytes" 8 "encoding/base64" 9 "strings" 10 "testing" 11 12 "github.com/stretchr/testify/assert" 13 ) 14 15 func TestDetectContentTypeLongerThanSniffLen(t *testing.T) { 16 // Pre-condition: Shorter than sniffLen detects SVG. 17 assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType) 18 // Longer than sniffLen detects something else. 19 assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType) 20 } 21 22 func TestIsTextFile(t *testing.T) { 23 assert.True(t, DetectContentType([]byte{}).IsText()) 24 assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText()) 25 } 26 27 func TestIsSvgImage(t *testing.T) { 28 assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage()) 29 assert.True(t, DetectContentType([]byte(" <svg></svg>")).IsSvgImage()) 30 assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage()) 31 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage()) 32 assert.True(t, DetectContentType([]byte(`<!-- Comment --> 33 <svg></svg>`)).IsSvgImage()) 34 assert.True(t, DetectContentType([]byte(`<!-- Multiple --> 35 <!-- Comments --> 36 <svg></svg>`)).IsSvgImage()) 37 assert.True(t, DetectContentType([]byte(`<!-- Multiline 38 Comment --> 39 <svg></svg>`)).IsSvgImage()) 40 assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN" 41 "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd"> 42 <svg></svg>`)).IsSvgImage()) 43 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 44 <!-- Comment --> 45 <svg></svg>`)).IsSvgImage()) 46 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 47 <!-- Multiple --> 48 <!-- Comments --> 49 <svg></svg>`)).IsSvgImage()) 50 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 51 <!-- Multline 52 Comment --> 53 <svg></svg>`)).IsSvgImage()) 54 assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 55 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"> 56 <!-- Multline 57 Comment --> 58 <svg></svg>`)).IsSvgImage()) 59 60 // the DetectContentType should work for incomplete data, because only beginning bytes are used for detection 61 assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage()) 62 63 assert.False(t, DetectContentType([]byte{}).IsSvgImage()) 64 assert.False(t, DetectContentType([]byte("svg")).IsSvgImage()) 65 assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage()) 66 assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage()) 67 assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage()) 68 assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage()) 69 assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment --> 70 <foo></foo>`)).IsSvgImage()) 71 assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?> 72 <!-- <svg></svg> inside comment --> 73 <foo></foo>`)).IsSvgImage()) 74 75 assert.False(t, DetectContentType([]byte(` 76 <!-- comment1 --> 77 <div> 78 <!-- comment2 --> 79 <svg></svg> 80 </div> 81 `)).IsSvgImage()) 82 83 assert.False(t, DetectContentType([]byte(` 84 <!-- comment1 85 --> 86 <div> 87 <!-- comment2 88 --> 89 <svg></svg> 90 </div> 91 `)).IsSvgImage()) 92 assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage()) 93 assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage()) 94 } 95 96 func TestIsPDF(t *testing.T) { 97 pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe") 98 assert.True(t, DetectContentType(pdf).IsPDF()) 99 assert.False(t, DetectContentType([]byte("plain text")).IsPDF()) 100 } 101 102 func TestIsVideo(t *testing.T) { 103 mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA") 104 assert.True(t, DetectContentType(mp4).IsVideo()) 105 assert.False(t, DetectContentType([]byte("plain text")).IsVideo()) 106 } 107 108 func TestIsAudio(t *testing.T) { 109 mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") 110 assert.True(t, DetectContentType(mp3).IsAudio()) 111 assert.False(t, DetectContentType([]byte("plain text")).IsAudio()) 112 113 assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio()) 114 assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text 115 assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char 116 } 117 118 func TestDetectContentTypeFromReader(t *testing.T) { 119 mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl") 120 st, err := DetectContentTypeFromReader(bytes.NewReader(mp3)) 121 assert.NoError(t, err) 122 assert.True(t, st.IsAudio()) 123 }