code.gitea.io/gitea@v1.19.3/modules/highlight/highlight.go (about) 1 // Copyright 2015 The Gogs Authors. All rights reserved. 2 // Copyright 2020 The Gitea Authors. All rights reserved. 3 // SPDX-License-Identifier: MIT 4 5 package highlight 6 7 import ( 8 "bufio" 9 "bytes" 10 "fmt" 11 gohtml "html" 12 "io" 13 "path/filepath" 14 "strings" 15 "sync" 16 17 "code.gitea.io/gitea/modules/analyze" 18 "code.gitea.io/gitea/modules/log" 19 "code.gitea.io/gitea/modules/setting" 20 "code.gitea.io/gitea/modules/util" 21 22 "github.com/alecthomas/chroma/v2" 23 "github.com/alecthomas/chroma/v2/formatters/html" 24 "github.com/alecthomas/chroma/v2/lexers" 25 "github.com/alecthomas/chroma/v2/styles" 26 lru "github.com/hashicorp/golang-lru" 27 ) 28 29 // don't index files larger than this many bytes for performance purposes 30 const sizeLimit = 1024 * 1024 31 32 var ( 33 // For custom user mapping 34 highlightMapping = map[string]string{} 35 36 once sync.Once 37 38 cache *lru.TwoQueueCache 39 ) 40 41 // NewContext loads custom highlight map from local config 42 func NewContext() { 43 once.Do(func() { 44 highlightMapping = setting.GetHighlightMapping() 45 46 // The size 512 is simply a conservative rule of thumb 47 c, err := lru.New2Q(512) 48 if err != nil { 49 panic(fmt.Sprintf("failed to initialize LRU cache for highlighter: %s", err)) 50 } 51 cache = c 52 }) 53 } 54 55 // Code returns a HTML version of code string with chroma syntax highlighting classes and the matched lexer name 56 func Code(fileName, language, code string) (string, string) { 57 NewContext() 58 59 // diff view newline will be passed as empty, change to literal '\n' so it can be copied 60 // preserve literal newline in blame view 61 if code == "" || code == "\n" { 62 return "\n", "" 63 } 64 65 if len(code) > sizeLimit { 66 return code, "" 67 } 68 69 var lexer chroma.Lexer 70 71 if len(language) > 0 { 72 lexer = lexers.Get(language) 73 74 if lexer == nil { 75 // Attempt stripping off the '?' 76 if idx := strings.IndexByte(language, '?'); idx > 0 { 77 lexer = lexers.Get(language[:idx]) 78 } 79 } 80 } 81 82 if lexer == nil { 83 if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { 84 // use mapped value to find lexer 85 lexer = lexers.Get(val) 86 } 87 } 88 89 if lexer == nil { 90 if l, ok := cache.Get(fileName); ok { 91 lexer = l.(chroma.Lexer) 92 } 93 } 94 95 if lexer == nil { 96 lexer = lexers.Match(fileName) 97 if lexer == nil { 98 lexer = lexers.Fallback 99 } 100 cache.Add(fileName, lexer) 101 } 102 103 lexerName := formatLexerName(lexer.Config().Name) 104 105 return CodeFromLexer(lexer, code), lexerName 106 } 107 108 // CodeFromLexer returns a HTML version of code string with chroma syntax highlighting classes 109 func CodeFromLexer(lexer chroma.Lexer, code string) string { 110 formatter := html.New(html.WithClasses(true), 111 html.WithLineNumbers(false), 112 html.PreventSurroundingPre(true), 113 ) 114 115 htmlbuf := bytes.Buffer{} 116 htmlw := bufio.NewWriter(&htmlbuf) 117 118 iterator, err := lexer.Tokenise(nil, code) 119 if err != nil { 120 log.Error("Can't tokenize code: %v", err) 121 return code 122 } 123 // style not used for live site but need to pass something 124 err = formatter.Format(htmlw, styles.GitHub, iterator) 125 if err != nil { 126 log.Error("Can't format code: %v", err) 127 return code 128 } 129 130 _ = htmlw.Flush() 131 // Chroma will add newlines for certain lexers in order to highlight them properly 132 // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output 133 return strings.TrimSuffix(htmlbuf.String(), "\n") 134 } 135 136 // File returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name 137 func File(fileName, language string, code []byte) ([]string, string, error) { 138 NewContext() 139 140 if len(code) > sizeLimit { 141 return PlainText(code), "", nil 142 } 143 144 formatter := html.New(html.WithClasses(true), 145 html.WithLineNumbers(false), 146 html.PreventSurroundingPre(true), 147 ) 148 149 var lexer chroma.Lexer 150 151 // provided language overrides everything 152 if language != "" { 153 lexer = lexers.Get(language) 154 } 155 156 if lexer == nil { 157 if val, ok := highlightMapping[filepath.Ext(fileName)]; ok { 158 lexer = lexers.Get(val) 159 } 160 } 161 162 if lexer == nil { 163 guessLanguage := analyze.GetCodeLanguage(fileName, code) 164 165 lexer = lexers.Get(guessLanguage) 166 if lexer == nil { 167 lexer = lexers.Match(fileName) 168 if lexer == nil { 169 lexer = lexers.Fallback 170 } 171 } 172 } 173 174 lexerName := formatLexerName(lexer.Config().Name) 175 176 iterator, err := lexer.Tokenise(nil, string(code)) 177 if err != nil { 178 return nil, "", fmt.Errorf("can't tokenize code: %w", err) 179 } 180 181 tokensLines := chroma.SplitTokensIntoLines(iterator.Tokens()) 182 htmlBuf := &bytes.Buffer{} 183 184 lines := make([]string, 0, len(tokensLines)) 185 for _, tokens := range tokensLines { 186 iterator = chroma.Literator(tokens...) 187 err = formatter.Format(htmlBuf, styles.GitHub, iterator) 188 if err != nil { 189 return nil, "", fmt.Errorf("can't format code: %w", err) 190 } 191 lines = append(lines, htmlBuf.String()) 192 htmlBuf.Reset() 193 } 194 195 return lines, lexerName, nil 196 } 197 198 // PlainText returns non-highlighted HTML for code 199 func PlainText(code []byte) []string { 200 r := bufio.NewReader(bytes.NewReader(code)) 201 m := make([]string, 0, bytes.Count(code, []byte{'\n'})+1) 202 for { 203 content, err := r.ReadString('\n') 204 if err != nil && err != io.EOF { 205 log.Error("failed to read string from buffer: %v", err) 206 break 207 } 208 if content == "" && err == io.EOF { 209 break 210 } 211 s := gohtml.EscapeString(content) 212 m = append(m, s) 213 } 214 return m 215 } 216 217 func formatLexerName(name string) string { 218 if name == "fallback" { 219 return "Plaintext" 220 } 221 222 return util.ToTitleCaseNoLower(name) 223 }