code.gitea.io/gitea@v1.22.3/modules/highlight/highlight.go (about)

     1  // Copyright 2015 The Gogs Authors. All rights reserved.
     2  // Copyright 2020 The Gitea Authors. All rights reserved.
     3  // SPDX-License-Identifier: MIT
     4  
     5  package highlight
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"fmt"
    11  	gohtml "html"
    12  	"html/template"
    13  	"io"
    14  	"path/filepath"
    15  	"strings"
    16  	"sync"
    17  
    18  	"code.gitea.io/gitea/modules/analyze"
    19  	"code.gitea.io/gitea/modules/log"
    20  	"code.gitea.io/gitea/modules/setting"
    21  	"code.gitea.io/gitea/modules/util"
    22  
    23  	"github.com/alecthomas/chroma/v2"
    24  	"github.com/alecthomas/chroma/v2/formatters/html"
    25  	"github.com/alecthomas/chroma/v2/lexers"
    26  	"github.com/alecthomas/chroma/v2/styles"
    27  	lru "github.com/hashicorp/golang-lru/v2"
    28  )
    29  
    30  // don't index files larger than this many bytes for performance purposes
    31  const sizeLimit = 1024 * 1024
    32  
    33  var (
    34  	// For custom user mapping
    35  	highlightMapping = map[string]string{}
    36  
    37  	once sync.Once
    38  
    39  	cache *lru.TwoQueueCache[string, any]
    40  
    41  	githubStyles = styles.Get("github")
    42  )
    43  
    44  // NewContext loads custom highlight map from local config
    45  func NewContext() {
    46  	once.Do(func() {
    47  		highlightMapping = setting.GetHighlightMapping()
    48  
    49  		// The size 512 is simply a conservative rule of thumb
    50  		c, err := lru.New2Q[string, any](512)
    51  		if err != nil {
    52  			panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err))
    53  		}
    54  		cache = c
    55  	})
    56  }
    57  
    58  // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name
    59  func Code(fileName, language, code string) (output template.HTML, lexerName string) {
    60  	NewContext()
    61  
    62  	// diff view newline will be passed as empty, change to literal '\n' so it can be copied
    63  	// preserve literal newline in blame view
    64  	if code == "" || code == "\n" {
    65  		return "\n", ""
    66  	}
    67  
    68  	if len(code) > sizeLimit {
    69  		return template.HTML(template.HTMLEscapeString(code)), ""
    70  	}
    71  
    72  	var lexer chroma.Lexer
    73  
    74  	if len(language) > 0 {
    75  		lexer = lexers.Get(language)
    76  
    77  		if lexer == nil {
    78  			// Attempt stripping off the '?'
    79  			if idx := strings.IndexByte(language, '?'); idx > 0 {
    80  				lexer = lexers.Get(language[:idx])
    81  			}
    82  		}
    83  	}
    84  
    85  	if lexer == nil {
    86  		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
    87  			// use mapped value to find lexer
    88  			lexer = lexers.Get(val)
    89  		}
    90  	}
    91  
    92  	if lexer == nil {
    93  		if l, ok := cache.Get(fileName); ok {
    94  			lexer = l.(chroma.Lexer)
    95  		}
    96  	}
    97  
    98  	if lexer == nil {
    99  		lexer = lexers.Match(fileName)
   100  		if lexer == nil {
   101  			lexer = lexers.Fallback
   102  		}
   103  		cache.Add(fileName, lexer)
   104  	}
   105  
   106  	return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name)
   107  }
   108  
   109  // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes
   110  func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML {
   111  	formatter := html.New(html.WithClasses(true),
   112  		html.WithLineNumbers(false),
   113  		html.PreventSurroundingPre(true),
   114  	)
   115  
   116  	htmlbuf := bytes.Buffer{}
   117  	htmlw := bufio.NewWriter(&htmlbuf)
   118  
   119  	iterator, err := lexer.Tokenise(nil, code)
   120  	if err != nil {
   121  		log.Error("Can't tokenize code: %v", err)
   122  		return template.HTML(template.HTMLEscapeString(code))
   123  	}
   124  	// style not used for live site but need to pass something
   125  	err = formatter.Format(htmlw, githubStyles, iterator)
   126  	if err != nil {
   127  		log.Error("Can't format code: %v", err)
   128  		return template.HTML(template.HTMLEscapeString(code))
   129  	}
   130  
   131  	_ = htmlw.Flush()
   132  	// Chroma will add newlines for certain lexers in order to highlight them properly
   133  	// Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
   134  	return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n"))
   135  }
   136  
   137  // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
   138  func File(fileName, language string, code []byte) ([]template.HTML, string, error) {
   139  	NewContext()
   140  
   141  	if len(code) > sizeLimit {
   142  		return PlainText(code), "", nil
   143  	}
   144  
   145  	formatter := html.New(html.WithClasses(true),
   146  		html.WithLineNumbers(false),
   147  		html.PreventSurroundingPre(true),
   148  	)
   149  
   150  	var lexer chroma.Lexer
   151  
   152  	// provided language overrides everything
   153  	if language != "" {
   154  		lexer = lexers.Get(language)
   155  	}
   156  
   157  	if lexer == nil {
   158  		if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
   159  			lexer = lexers.Get(val)
   160  		}
   161  	}
   162  
   163  	if lexer == nil {
   164  		guessLanguage := analyze.GetCodeLanguage(fileName, code)
   165  
   166  		lexer = lexers.Get(guessLanguage)
   167  		if lexer == nil {
   168  			lexer = lexers.Match(fileName)
   169  			if lexer == nil {
   170  				lexer = lexers.Fallback
   171  			}
   172  		}
   173  	}
   174  
   175  	lexerName := formatLexerName(lexer.Config().Name)
   176  
   177  	iterator, err := lexer.Tokenise(nil, string(code))
   178  	if err != nil {
   179  		return nil, "", fmt.Errorf("can't tokenize code: %w", err)
   180  	}
   181  
   182  	tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens())
   183  	htmlBuf := &bytes.Buffer{}
   184  
   185  	lines := make([]template.HTML, 0, len(tokensLines))
   186  	for _, tokens := range tokensLines {
   187  		iterator = chroma.Literator(tokens...)
   188  		err = formatter.Format(htmlBuf, githubStyles, iterator)
   189  		if err != nil {
   190  			return nil, "", fmt.Errorf("can't format code: %w", err)
   191  		}
   192  		lines = append(lines, template.HTML(htmlBuf.String()))
   193  		htmlBuf.Reset()
   194  	}
   195  
   196  	return lines, lexerName, nil
   197  }
   198  
   199  // PlainText returns non-highlighted HTML for code
   200  func PlainText(code []byte) []template.HTML {
   201  	r := bufio.NewReader(bytes.NewReader(code))
   202  	m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1)
   203  	for {
   204  		content, err := r.ReadString('\n')
   205  		if err != nil && err != io.EOF {
   206  			log.Error("failed to read string from buffer: %v", err)
   207  			break
   208  		}
   209  		if content == "" && err == io.EOF {
   210  			break
   211  		}
   212  		s := template.HTML(gohtml.EscapeString(content))
   213  		m = append(m, s)
   214  	}
   215  	return m
   216  }
   217  
   218  func formatLexerName(name string) string {
   219  	if name == "fallback" {
   220  		return "Plaintext"
   221  	}
   222  
   223  	return util.ToTitleCaseNoLower(name)
   224  }