code.gitea.io/gitea@v1.22.3/modules/highlight/highlight.go (about) 1 // Copyright 2015 The Gogs Authors. All rights reserved. 2 // Copyright 2020 The Gitea Authors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 5 package highlight 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 gohtml "html" 12 "html/template" 13 "io" 14 "path/filepath" 15 "strings" 16 "sync" 17 18 "code.gitea.io/gitea/modules/analyze" 19 "code.gitea.io/gitea/modules/log" 20 "code.gitea.io/gitea/modules/setting" 21 "code.gitea.io/gitea/modules/util" 22 23 "github.com/alecthomas/chroma/v2" 24 "github.com/alecthomas/chroma/v2/formatters/html" 25 "github.com/alecthomas/chroma/v2/lexers" 26 "github.com/alecthomas/chroma/v2/styles" 27 lru "github.com/hashicorp/golang-lru/v2" 28 ) 29 30 // don't index files larger than this many bytes for performance purposes 31 const sizeLimit = 1024 * 1024 32 33 var ( 34 // For custom user mapping 35 highlightMapping = map[string]string{} 36 37 once sync.Once 38 39 cache *lru.TwoQueueCache[string, any] 40 41 githubStyles = styles.Get("github") 42 ) 43 44 // NewContext loads custom highlight map from local config 45 func NewContext() { 46 once.Do(func() { 47 highlightMapping = setting.GetHighlightMapping() 48 49 // The size 512 is simply a conservative rule of thumb 50 c, err := lru.New2Q[string, any](512) 51 if err != nil { 52 panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err)) 53 } 54 cache = c 55 }) 56 } 57 58 // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name 59 func Code(fileName, language, code string) (output template.HTML, lexerName string) { 60 NewContext() 61 62 // diff view newline will be passed as empty, change to literal '\n' so it can be copied 63 // preserve literal newline in blame view 64 if code == "" || code == "\n" { 65 return "\n", "" 66 } 67 68 if len(code) > sizeLimit { 69 return template.HTML(template.HTMLEscapeString(code)), "" 70 } 71 72 var lexer chroma.Lexer 73 74 if len(language) > 0 { 75 lexer = lexers.Get(language) 76 77 if lexer == nil { 78 // Attempt stripping off the '?' 79 if idx := strings.IndexByte(language, '?'); idx > 0 { 80 lexer = lexers.Get(language[:idx]) 81 } 82 } 83 } 84 85 if lexer == nil { 86 if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { 87 // use mapped value to find lexer 88 lexer = lexers.Get(val) 89 } 90 } 91 92 if lexer == nil { 93 if l, ok := cache.Get(fileName); ok { 94 lexer = l.(chroma.Lexer) 95 } 96 } 97 98 if lexer == nil { 99 lexer = lexers.Match(fileName) 100 if lexer == nil { 101 lexer = lexers.Fallback 102 } 103 cache.Add(fileName, lexer) 104 } 105 106 return CodeFromLexer(lexer, code), formatLexerName(lexer.Config().Name) 107 } 108 109 // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes 110 func CodeFromLexer(lexer chroma.Lexer, code string) template.HTML { 111 formatter := html.New(html.WithClasses(true), 112 html.WithLineNumbers(false), 113 html.PreventSurroundingPre(true), 114 ) 115 116 htmlbuf := bytes.Buffer{} 117 htmlw := bufio.NewWriter(&htmlbuf) 118 119 iterator, err := lexer.Tokenise(nil, code) 120 if err != nil { 121 log.Error("Can't tokenize code: %v", err) 122 return template.HTML(template.HTMLEscapeString(code)) 123 } 124 // style not used for live site but need to pass something 125 err = formatter.Format(htmlw, githubStyles, iterator) 126 if err != nil { 127 log.Error("Can't format code: %v", err) 128 return template.HTML(template.HTMLEscapeString(code)) 129 } 130 131 _ = htmlw.Flush() 132 // Chroma will add newlines for certain lexers in order to highlight them properly 133 // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output 134 return template.HTML(strings.TrimSuffix(htmlbuf.String(), "\n")) 135 } 136 137 // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name 138 func File(fileName, language string, code []byte) ([]template.HTML, string, error) { 139 NewContext() 140 141 if len(code) > sizeLimit { 142 return PlainText(code), "", nil 143 } 144 145 formatter := html.New(html.WithClasses(true), 146 html.WithLineNumbers(false), 147 html.PreventSurroundingPre(true), 148 ) 149 150 var lexer chroma.Lexer 151 152 // provided language overrides everything 153 if language != "" { 154 lexer = lexers.Get(language) 155 } 156 157 if lexer == nil { 158 if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { 159 lexer = lexers.Get(val) 160 } 161 } 162 163 if lexer == nil { 164 guessLanguage := analyze.GetCodeLanguage(fileName, code) 165 166 lexer = lexers.Get(guessLanguage) 167 if lexer == nil { 168 lexer = lexers.Match(fileName) 169 if lexer == nil { 170 lexer = lexers.Fallback 171 } 172 } 173 } 174 175 lexerName := formatLexerName(lexer.Config().Name) 176 177 iterator, err := lexer.Tokenise(nil, string(code)) 178 if err != nil { 179 return nil, "", fmt.Errorf("can't tokenize code: %w", err) 180 } 181 182 tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens()) 183 htmlBuf := &bytes.Buffer{} 184 185 lines := make([]template.HTML, 0, len(tokensLines)) 186 for _, tokens := range tokensLines { 187 iterator = chroma.Literator(tokens...) 188 err = formatter.Format(htmlBuf, githubStyles, iterator) 189 if err != nil { 190 return nil, "", fmt.Errorf("can't format code: %w", err) 191 } 192 lines = append(lines, template.HTML(htmlBuf.String())) 193 htmlBuf.Reset() 194 } 195 196 return lines, lexerName, nil 197 } 198 199 // PlainText returns non-highlighted HTML for code 200 func PlainText(code []byte) []template.HTML { 201 r := bufio.NewReader(bytes.NewReader(code)) 202 m := make([]template.HTML, 0, bytes.Count(code, []byte{'\n'})+1) 203 for { 204 content, err := r.ReadString('\n') 205 if err != nil && err != io.EOF { 206 log.Error("failed to read string from buffer: %v", err) 207 break 208 } 209 if content == "" && err == io.EOF { 210 break 211 } 212 s := template.HTML(gohtml.EscapeString(content)) 213 m = append(m, s) 214 } 215 return m 216 } 217 218 func formatLexerName(name string) string { 219 if name == "fallback" { 220 return "Plaintext" 221 } 222 223 return util.ToTitleCaseNoLower(name) 224 }