code.gitea.io/gitea@v1.19.3/modules/highlight/highlight.go (about)

     1  // Copyright 2015 The Gogs Authors. All rights reserved.
     2  // Copyright 2020 The Gitea Authors. All rights reserved.
     3  // SPDX-License-Identifier: MIT
     4  
     5  package highlight
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"fmt"
    11  	gohtml "html"
    12  	"io"
    13  	"path/filepath"
    14  	"strings"
    15  	"sync"
    16  
    17  	"code.gitea.io/gitea/modules/analyze"
    18  	"code.gitea.io/gitea/modules/log"
    19  	"code.gitea.io/gitea/modules/setting"
    20  	"code.gitea.io/gitea/modules/util"
    21  
    22  	"github.com/alecthomas/chroma/v2"
    23  	"github.com/alecthomas/chroma/v2/formatters/html"
    24  	"github.com/alecthomas/chroma/v2/lexers"
    25  	"github.com/alecthomas/chroma/v2/styles"
    26  	lru "github.com/hashicorp/golang-lru"
    27  )
    28  
    29  // don't index files larger than this many bytes for performance purposes
    30  const sizeLimit = 1024 * 1024
    31  
    32  var (
    33  	// For custom user mapping
    34  	highlightMapping = map[string]string{}
    35  
    36  	once sync.Once
    37  
    38  	cache *lru.TwoQueueCache
    39  )
    40  
    41  // NewContext loads custom highlight map from local config
    42  func NewContext() {
    43  	once.Do(func() {
    44  		highlightMapping = setting.GetHighlightMapping()
    45  
    46  		// The size 512 is simply a conservative rule of thumb
    47  		c, err := lru.New2Q(512)
    48  		if err != nil {
    49  			panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
    50  		}
    51  		cache = c
    52  	})
    53  }
    54  
    55  // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
    56  func Code(fileName, language, code string) (string, string) {
    57  	NewContext()
    58  
    59  	// diff view newline will be passed as empty, change to literal '\n' so it can be copied
    60  	// preserve literal newline in blame view
    61  	if code == "" || code == "\n" {
    62  		return "\n", ""
    63  	}
    64  
    65  	if len(code) > sizeLimit {
    66  		return code, ""
    67  	}
    68  
    69  	var lexer chroma.Lexer
    70  
    71  	if len(language) > 0 {
    72  		lexer = lexers.Get(language)
    73  
    74  		if lexer == nil {
    75  			// Attempt stripping off the '?'
    76  			if idx := strings.IndexByte(language, '?'); idx > 0 {
    77  				lexer = lexers.Get(language[:idx])
    78  			}
    79  		}
    80  	}
    81  
    82  	if lexer == nil {
    83  		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
    84  			// use mapped value to find lexer
    85  			lexer = lexers.Get(val)
    86  		}
    87  	}
    88  
    89  	if lexer == nil {
    90  		if l, ok := cache.Get(fileName); ok {
    91  			lexer = l.(chroma.Lexer)
    92  		}
    93  	}
    94  
    95  	if lexer == nil {
    96  		lexer = lexers.Match(fileName)
    97  		if lexer == nil {
    98  			lexer = lexers.Fallback
    99  		}
   100  		cache.Add(fileName, lexer)
   101  	}
   102  
   103  	lexerName := formatLexerName(lexer.Config().Name)
   104  
   105  	return CodeFromLexer(lexer, code), lexerName
   106  }
   107  
   108  // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
   109  func CodeFromLexer(lexer chroma.Lexer, code string) string {
   110  	formatter := html.New(html.WithClasses(true),
   111  		html.WithLineNumbers(false),
   112  		html.PreventSurroundingPre(true),
   113  	)
   114  
   115  	htmlbuf := bytes.Buffer{}
   116  	htmlw := bufio.NewWriter(&htmlbuf)
   117  
   118  	iterator, err := lexer.Tokenise(nil, code)
   119  	if err != nil {
   120  		log.Error("Can't tokenize code: %v", err)
   121  		return code
   122  	}
   123  	// style not used for live site but need to pass something
   124  	err = formatter.Format(htmlw, styles.GitHub, iterator)
   125  	if err != nil {
   126  		log.Error("Can't format code: %v", err)
   127  		return code
   128  	}
   129  
   130  	_ = htmlw.Flush()
   131  	// Chroma will add newlines for certain lexers in order to highlight them properly
   132  	// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
   133  	return strings.TrimSuffix(htmlbuf.String(), "\n")
   134  }
   135  
   136  // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
   137  func File(fileName, language string, code []byte) ([]string, string, error) {
   138  	NewContext()
   139  
   140  	if len(code) > sizeLimit {
   141  		return PlainText(code), "", nil
   142  	}
   143  
   144  	formatter := html.New(html.WithClasses(true),
   145  		html.WithLineNumbers(false),
   146  		html.PreventSurroundingPre(true),
   147  	)
   148  
   149  	var lexer chroma.Lexer
   150  
   151  	// provided language overrides everything
   152  	if language != "" {
   153  		lexer = lexers.Get(language)
   154  	}
   155  
   156  	if lexer == nil {
   157  		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
   158  			lexer = lexers.Get(val)
   159  		}
   160  	}
   161  
   162  	if lexer == nil {
   163  		guessLanguage := analyze.GetCodeLanguage(fileName, code)
   164  
   165  		lexer = lexers.Get(guessLanguage)
   166  		if lexer == nil {
   167  			lexer = lexers.Match(fileName)
   168  			if lexer == nil {
   169  				lexer = lexers.Fallback
   170  			}
   171  		}
   172  	}
   173  
   174  	lexerName := formatLexerName(lexer.Config().Name)
   175  
   176  	iterator, err := lexer.Tokenise(nil, string(code))
   177  	if err != nil {
   178  		return nil, "", fmt.Errorf("can't tokenize code: %w", err)
   179  	}
   180  
   181  	tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
   182  	htmlBuf := &bytes.Buffer{}
   183  
   184  	lines := make([]string, 0, len(tokensLines))
   185  	for _, tokens := range tokensLines {
   186  		iterator = chroma.Literator(tokens...)
   187  		err = formatter.Format(htmlBuf, styles.GitHub, iterator)
   188  		if err != nil {
   189  			return nil, "", fmt.Errorf("can't format code: %w", err)
   190  		}
   191  		lines = append(lines, htmlBuf.String())
   192  		htmlBuf.Reset()
   193  	}
   194  
   195  	return lines, lexerName, nil
   196  }
   197  
   198  // PlainText returns non-highlighted HTML for code
   199  func PlainText(code []byte) []string {
   200  	r := bufio.NewReader(bytes.NewReader(code))
   201  	m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1)
   202  	for {
   203  		content, err := r.ReadString('\n')
   204  		if err != nil && err != io.EOF {
   205  			log.Error("failed to read string from buffer: %v", err)
   206  			break
   207  		}
   208  		if content == "" && err == io.EOF {
   209  			break
   210  		}
   211  		s := gohtml.EscapeString(content)
   212  		m = append(m, s)
   213  	}
   214  	return m
   215  }
   216  
   217  func formatLexerName(name string) string {
   218  	if name == "fallback" {
   219  		return "Plaintext"
   220  	}
   221  
   222  	return util.ToTitleCaseNoLower(name)
   223  }