code.gitea.io/gitea@v1.19.3/modules/typesniffer/typesniffer_test.go (about)

     1  // Copyright 2021 The Gitea Authors. All rights reserved.
     2  // SPDX-License-Identifier: MIT
     3  
     4  package typesniffer
     5  
     6  import (
     7  	"bytes"
     8  	"encoding/base64"
     9  	"strings"
    10  	"testing"
    11  
    12  	"github.com/stretchr/testify/assert"
    13  )
    14  
    15  func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
    16  	// Pre-condition: Shorter than sniffLen detects SVG.
    17  	assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
    18  	// Longer than sniffLen detects something else.
    19  	assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
    20  }
    21  
    22  func TestIsTextFile(t *testing.T) {
    23  	assert.True(t, DetectContentType([]byte{}).IsText())
    24  	assert.True(t, DetectContentType([]byte("lorem ipsum")).IsText())
    25  }
    26  
    27  func TestIsSvgImage(t *testing.T) {
    28  	assert.True(t, DetectContentType([]byte("<svg></svg>")).IsSvgImage())
    29  	assert.True(t, DetectContentType([]byte("    <svg></svg>")).IsSvgImage())
    30  	assert.True(t, DetectContentType([]byte(`<svg width="100"></svg>`)).IsSvgImage())
    31  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)).IsSvgImage())
    32  	assert.True(t, DetectContentType([]byte(`<!-- Comment -->
    33  	<svg></svg>`)).IsSvgImage())
    34  	assert.True(t, DetectContentType([]byte(`<!-- Multiple -->
    35  	<!-- Comments -->
    36  	<svg></svg>`)).IsSvgImage())
    37  	assert.True(t, DetectContentType([]byte(`<!-- Multiline
    38  	Comment -->
    39  	<svg></svg>`)).IsSvgImage())
    40  	assert.True(t, DetectContentType([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
    41  	"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
    42  	<svg></svg>`)).IsSvgImage())
    43  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    44  	<!-- Comment -->
    45  	<svg></svg>`)).IsSvgImage())
    46  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    47  	<!-- Multiple -->
    48  	<!-- Comments -->
    49  	<svg></svg>`)).IsSvgImage())
    50  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    51  	<!-- Multline
    52  	Comment -->
    53  	<svg></svg>`)).IsSvgImage())
    54  	assert.True(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    55  	<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
    56  	<!-- Multline
    57  	Comment -->
    58  	<svg></svg>`)).IsSvgImage())
    59  
    60  	// the DetectContentType should work for incomplete data, because only beginning bytes are used for detection
    61  	assert.True(t, DetectContentType([]byte(`<svg>....`)).IsSvgImage())
    62  
    63  	assert.False(t, DetectContentType([]byte{}).IsSvgImage())
    64  	assert.False(t, DetectContentType([]byte("svg")).IsSvgImage())
    65  	assert.False(t, DetectContentType([]byte("<svgfoo></svgfoo>")).IsSvgImage())
    66  	assert.False(t, DetectContentType([]byte("text<svg></svg>")).IsSvgImage())
    67  	assert.False(t, DetectContentType([]byte("<html><body><svg></svg></body></html>")).IsSvgImage())
    68  	assert.False(t, DetectContentType([]byte(`<script>"<svg></svg>"</script>`)).IsSvgImage())
    69  	assert.False(t, DetectContentType([]byte(`<!-- <svg></svg> inside comment -->
    70  	<foo></foo>`)).IsSvgImage())
    71  	assert.False(t, DetectContentType([]byte(`<?xml version="1.0" encoding="UTF-8"?>
    72  	<!-- <svg></svg> inside comment -->
    73  	<foo></foo>`)).IsSvgImage())
    74  
    75  	assert.False(t, DetectContentType([]byte(`
    76  <!-- comment1 -->
    77  <div>
    78  	<!-- comment2 -->
    79  	<svg></svg>
    80  </div>
    81  `)).IsSvgImage())
    82  
    83  	assert.False(t, DetectContentType([]byte(`
    84  <!-- comment1
    85  -->
    86  <div>
    87  	<!-- comment2
    88  -->
    89  	<svg></svg>
    90  </div>
    91  `)).IsSvgImage())
    92  	assert.False(t, DetectContentType([]byte(`<html><body><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg></svg></body></html>`)).IsSvgImage())
    93  	assert.False(t, DetectContentType([]byte(`<html><body><?xml version="1.0" encoding="UTF-8"?><svg></svg></body></html>`)).IsSvgImage())
    94  }
    95  
    96  func TestIsPDF(t *testing.T) {
    97  	pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
    98  	assert.True(t, DetectContentType(pdf).IsPDF())
    99  	assert.False(t, DetectContentType([]byte("plain text")).IsPDF())
   100  }
   101  
   102  func TestIsVideo(t *testing.T) {
   103  	mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
   104  	assert.True(t, DetectContentType(mp4).IsVideo())
   105  	assert.False(t, DetectContentType([]byte("plain text")).IsVideo())
   106  }
   107  
   108  func TestIsAudio(t *testing.T) {
   109  	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
   110  	assert.True(t, DetectContentType(mp3).IsAudio())
   111  	assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
   112  
   113  	assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
   114  	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText())          // test ID3 tag for plain text
   115  	assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
   116  }
   117  
   118  func TestDetectContentTypeFromReader(t *testing.T) {
   119  	mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
   120  	st, err := DetectContentTypeFromReader(bytes.NewReader(mp3))
   121  	assert.NoError(t, err)
   122  	assert.True(t, st.IsAudio())
   123  }