code.gitea.io/gitea@v1.22.3/modules/typesniffer/typesniffer_test.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package typesniffer
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/base64"
     9  	"encoding/hex"
    10  	"strings"
    11  	"testing"
    12  
    13  	"github.com/stretchr/testify/assert"
    14  )
    15  
    16  func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
    17  	// Pre-condition: Shorter than sniffLen detects SVG.
    18  	assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
    19  	// Longer than sniffLen detects something else.
    20  	assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
    21  }
    22  
    23  func TestIsTextFile(t *testing.T) {
    24  	assert.True(t, DetectContentType([]byte{}).IsText())
    25  	assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
    26  }
    27  
    28  func TestIsSvgImage(t *testing.T) {
    29  	assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
    30  	assert.True(t, DetectContentType([]byte("    <svg></svg>")).IsSvgImage())
    31  	assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
    32  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
    33  	assert.True(t, DetectContentType([]byte(`<!-- Comment -->
    34  	<svg></svg>`)).IsSvgImage())
    35  	assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
    36  	<!-- Comments -->
    37  	<svg></svg>`)).IsSvgImage())
    38  	assert.True(t, DetectContentType([]byte(`<!-- Multiline
    39  	Comment -->
    40  	<svg></svg>`)).IsSvgImage())
    41  	assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
    42  	"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
    43  	<svg></svg>`)).IsSvgImage())
    44  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    45  	<!-- Comment -->
    46  	<svg></svg>`)).IsSvgImage())
    47  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    48  	<!-- Multiple -->
    49  	<!-- Comments -->
    50  	<svg></svg>`)).IsSvgImage())
    51  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    52  	<!-- Multline
    53  	Comment -->
    54  	<svg></svg>`)).IsSvgImage())
    55  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    56  	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
    57  	<!-- Multline
    58  	Comment -->
    59  	<svg></svg>`)).IsSvgImage())
    60  
    61  	// the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
    62  	assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
    63  
    64  	assert.False(t, DetectContentType([]byte{}).IsSvgImage())
    65  	assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
    66  	assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
    67  	assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
    68  	assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
    69  	assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
    70  	assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
    71  	<foo></foo>`)).IsSvgImage())
    72  	assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    73  	<!-- <svg></svg> inside comment -->
    74  	<foo></foo>`)).IsSvgImage())
    75  
    76  	assert.False(t, DetectContentType([]byte(`
    77  <!-- comment1 -->
    78  <div>
    79  	<!-- comment2 -->
    80  	<svg></svg>
    81  </div>
    82  `)).IsSvgImage())
    83  
    84  	assert.False(t, DetectContentType([]byte(`
    85  <!-- comment1
    86  -->
    87  <div>
    88  	<!-- comment2
    89  -->
    90  	<svg></svg>
    91  </div>
    92  `)).IsSvgImage())
    93  	assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
    94  	assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
    95  }
    96  
    97  func TestIsPDF(t *testing.T) {
    98  	pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
    99  	assert.True(t, DetectContentType(pdf).IsPDF())
   100  	assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
   101  }
   102  
   103  func TestIsVideo(t *testing.T) {
   104  	mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
   105  	assert.True(t, DetectContentType(mp4).IsVideo())
   106  	assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
   107  }
   108  
   109  func TestIsAudio(t *testing.T) {
   110  	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
   111  	assert.True(t, DetectContentType(mp3).IsAudio())
   112  	assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
   113  
   114  	assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
   115  	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText())          // test ID3 tag for plain text
   116  	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
   117  }
   118  
   119  func TestDetectContentTypeFromReader(t *testing.T) {
   120  	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
   121  	st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
   122  	assert.NoError(t, err)
   123  	assert.True(t, st.IsAudio())
   124  }
   125  
   126  func TestDetectContentTypeOgg(t *testing.T) {
   127  	oggAudio, _ := hex.DecodeString("4f67675300020000000000000000352f0000000000007dc39163011e01766f72626973000000000244ac0000000000000071020000000000b8014f6767530000")
   128  	st, err := DetectContentTypeFromReader(bytes.NewReader(oggAudio))
   129  	assert.NoError(t, err)
   130  	assert.True(t, st.IsAudio())
   131  
   132  	oggVideo, _ := hex.DecodeString("4f676753000200000000000000007d9747ef000000009b59daf3012a807468656f7261030201001e00110001e000010e00020000001e00000001000001000001")
   133  	st, err = DetectContentTypeFromReader(bytes.NewReader(oggVideo))
   134  	assert.NoError(t, err)
   135  	assert.True(t, st.IsVideo())
   136  }