55package highlight
66
77import (
8- "bufio"
98 "bytes"
10- "fmt"
11- gohtml "html"
129 "html/template"
13- "io"
14- "strings"
10+ "slices"
1511 "sync"
1612
1713 "code.gitea.io/gitea/modules/log"
@@ -23,12 +19,14 @@ import (
2319 "github.com/alecthomas/chroma/v2/styles"
2420)
2521
26- // don't index files larger than this many bytes for performance purposes
22+ // don't highlight files larger than this many bytes for performance purposes
2723const sizeLimit = 1024 * 1024
2824
2925type globalVarsType struct {
3026 highlightMapping map [string ]string
3127 githubStyles * chroma.Style
28+ escapeFull []template.HTML
29+ escCtrlCharsMap []template.HTML
3230}
3331
3432var (
@@ -44,10 +42,69 @@ func globalVars() *globalVarsType {
4442 globalVarsPtr = & globalVarsType {}
4543 globalVarsPtr .githubStyles = styles .Get ("github" )
4644 globalVarsPtr .highlightMapping = setting .GetHighlightMapping ()
45+ globalVarsPtr .escCtrlCharsMap = make ([]template.HTML , 256 )
46+ // ASCII Table 0x00 - 0x1F
47+ controlCharNames := []string {
48+ "NUL" , "SOH" , "STX" , "ETX" , "EOT" , "ENQ" , "ACK" , "BEL" ,
49+ "BS" , "HT" , "LF" , "VT" , "FF" , "CR" , "SO" , "SI" ,
50+ "DLE" , "DC1" , "DC2" , "DC3" , "DC4" , "NAK" , "SYN" , "ETB" ,
51+ "CAN" , "EM" , "SUB" , "ESC" , "FS" , "GS" , "RS" , "US" ,
52+ }
53+ // Uncomment this line if you'd debug the layout without creating a special file, then Space (0x20) will also be escaped.
54+ // Don't worry, even if you forget to comment it out and push it to git repo, the CI tests will catch it and fail.
55+ // controlCharNames = append(controlCharNames, "SP")
56+ for i , s := range controlCharNames {
57+ globalVarsPtr .escCtrlCharsMap [i ] = template .HTML (`<span class="broken-code-point" data-escaped="` + s + `"><span class="char">` + string (byte (i )) + `</span></span>` )
58+ }
59+ globalVarsPtr .escCtrlCharsMap [0x7f ] = template .HTML (`<span class="broken-code-point" data-escaped="DEL"><span class="char">` + string (byte (0x7f )) + `</span></span>` )
60+ globalVarsPtr .escCtrlCharsMap ['\t' ] = ""
61+ globalVarsPtr .escCtrlCharsMap ['\n' ] = ""
62+ globalVarsPtr .escCtrlCharsMap ['\r' ] = ""
63+
64+ globalVarsPtr .escapeFull = slices .Clone (globalVarsPtr .escCtrlCharsMap )
65+ // exactly the same as Golang's html.EscapeString
66+ globalVarsPtr .escapeFull ['&' ] = "&"
67+ globalVarsPtr .escapeFull ['\'' ] = "'"
68+ globalVarsPtr .escapeFull ['<' ] = "<"
69+ globalVarsPtr .escapeFull ['>' ] = ">"
70+ globalVarsPtr .escapeFull ['"' ] = """
4771 }
4872 return globalVarsPtr
4973}
5074
75+ func escapeByMap (code []byte , escapeMap []template.HTML ) template.HTML {
76+ firstEscapePos := - 1
77+ for i , c := range code {
78+ if escapeMap [c ] != "" {
79+ firstEscapePos = i
80+ break
81+ }
82+ }
83+ if firstEscapePos == - 1 {
84+ return template .HTML (util .UnsafeBytesToString (code ))
85+ }
86+
87+ buf := make ([]byte , firstEscapePos , len (code )* 2 )
88+ copy (buf [:firstEscapePos ], code [:firstEscapePos ])
89+ for i := firstEscapePos ; i < len (code ); i ++ {
90+ c := code [i ]
91+ if esc := escapeMap [c ]; esc != "" {
92+ buf = append (buf , esc ... )
93+ } else {
94+ buf = append (buf , c )
95+ }
96+ }
97+ return template .HTML (util .UnsafeBytesToString (buf ))
98+ }
99+
100+ func escapeFullString (code string ) template.HTML {
101+ return escapeByMap (util .UnsafeStringToBytes (code ), globalVars ().escapeFull )
102+ }
103+
104+ func escapeControlChars (code []byte ) template.HTML {
105+ return escapeByMap (code , globalVars ().escCtrlCharsMap )
106+ }
107+
51108// UnsafeSplitHighlightedLines splits highlighted code into lines preserving HTML tags
52109// It always includes '\n', '\n' can appear at the end of each line or in the middle of HTML tags
53110// The '\n' is necessary for copying code from web UI to preserve original code lines
@@ -90,7 +147,7 @@ func RenderCodeSlowGuess(fileName, language, code string) (output template.HTML,
90147 }
91148
92149 if len (code ) > sizeLimit {
93- return template . HTML ( template . HTMLEscapeString ( code ) ), nil , ""
150+ return escapeFullString ( code ), nil , ""
94151 }
95152
96153 lexer = detectChromaLexerWithAnalyze (fileName , language , util .UnsafeStringToBytes (code )) // it is also slow
@@ -104,86 +161,66 @@ func RenderCodeByLexer(lexer chroma.Lexer, code string) template.HTML {
104161 html .PreventSurroundingPre (true ),
105162 )
106163
107- htmlbuf := bytes.Buffer {}
108- htmlw := bufio .NewWriter (& htmlbuf )
109-
110164 iterator , err := lexer .Tokenise (nil , code )
111165 if err != nil {
112166 log .Error ("Can't tokenize code: %v" , err )
113- return template . HTML ( template . HTMLEscapeString ( code ) )
167+ return escapeFullString ( code )
114168 }
169+
170+ htmlBuf := & bytes.Buffer {}
115171 // style not used for live site but need to pass something
116- err = formatter .Format (htmlw , globalVars ().githubStyles , iterator )
172+ err = formatter .Format (htmlBuf , globalVars ().githubStyles , iterator )
117173 if err != nil {
118174 log .Error ("Can't format code: %v" , err )
119- return template . HTML ( template . HTMLEscapeString ( code ) )
175+ return escapeFullString ( code )
120176 }
121177
122- _ = htmlw .Flush ()
123- // Chroma will add newlines for certain lexers in order to highlight them properly
124- // Once highlighted, strip them here, so they don't cause copy/paste trouble in HTML output
125- return template .HTML (strings .TrimSuffix (htmlbuf .String (), "\n " ))
178+ // At the moment, we do not escape control chars here (unlike RenderFullFile which escapes control chars).
179+ // The reason is: it is a very rare case that a text file contains control chars.
180+ // This function is usually used by highlight diff and blame, not quite sure whether there will be side effects.
181+ // If there would be new user feedback about this, we can re-consider about various edge cases.
182+ return template .HTML (htmlBuf .String ())
126183}
127184
128185// RenderFullFile returns a slice of chroma syntax highlighted HTML lines of code and the matched lexer name
129- func RenderFullFile (fileName , language string , code []byte ) ([]template.HTML , string , error ) {
130- if len (code ) > sizeLimit {
131- return RenderPlainText (code ), "" , nil
186+ func RenderFullFile (fileName , language string , code []byte ) ([]template.HTML , string ) {
187+ if language == LanguagePlaintext || len (code ) > sizeLimit {
188+ return renderPlainText (code ), formatLexerName ( LanguagePlaintext )
132189 }
133-
134- formatter := html .New (html .WithClasses (true ),
135- html .WithLineNumbers (false ),
136- html .PreventSurroundingPre (true ),
137- )
138-
139190 lexer := detectChromaLexerWithAnalyze (fileName , language , code )
140191 lexerName := formatLexerName (lexer .Config ().Name )
141-
142- iterator , err := lexer .Tokenise (nil , string (code ))
143- if err != nil {
144- return nil , "" , fmt .Errorf ("can't tokenize code: %w" , err )
192+ rendered := RenderCodeByLexer (lexer , util .UnsafeBytesToString (code ))
193+ unsafeLines := UnsafeSplitHighlightedLines (rendered )
194+ lines := make ([]template.HTML , 0 , len (unsafeLines ))
195+ for _ , lineBytes := range unsafeLines {
196+ line := escapeControlChars (lineBytes )
197+ lines = append (lines , line )
145198 }
146-
147- tokensLines := chroma .SplitTokensIntoLines (iterator .Tokens ())
148- htmlBuf := & bytes.Buffer {}
149-
150- lines := make ([]template.HTML , 0 , len (tokensLines ))
151- for _ , tokens := range tokensLines {
152- iterator = chroma .Literator (tokens ... )
153- err = formatter .Format (htmlBuf , globalVars ().githubStyles , iterator )
154- if err != nil {
155- return nil , "" , fmt .Errorf ("can't format code: %w" , err )
156- }
157- lines = append (lines , template .HTML (htmlBuf .String ()))
158- htmlBuf .Reset ()
159- }
160-
161- return lines , lexerName , nil
199+ return lines , lexerName
162200}
163201
164- // RenderPlainText returns non-highlighted HTML for code
165- func RenderPlainText (code []byte ) []template.HTML {
166- r := bufio .NewReader (bytes .NewReader (code ))
167- m := make ([]template.HTML , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
168- for {
169- content , err := r .ReadString ('\n' )
170- if err != nil && err != io .EOF {
171- log .Error ("failed to read string from buffer: %v" , err )
172- break
173- }
174- if content == "" && err == io .EOF {
175- break
202+ // renderPlainText returns non-highlighted HTML for code
203+ func renderPlainText (code []byte ) []template.HTML {
204+ lines := make ([]template.HTML , 0 , bytes .Count (code , []byte {'\n' })+ 1 )
205+ pos := 0
206+ for pos < len (code ) {
207+ var content []byte
208+ nextPos := bytes .IndexByte (code [pos :], '\n' )
209+ if nextPos == - 1 {
210+ content = code [pos :]
211+ pos = len (code )
212+ } else {
213+ content = code [pos : pos + nextPos + 1 ]
214+ pos += nextPos + 1
176215 }
177- s := template .HTML (gohtml .EscapeString (content ))
178- m = append (m , s )
216+ lines = append (lines , escapeFullString (util .UnsafeBytesToString (content )))
179217 }
180- return m
218+ return lines
181219}
182220
183221func formatLexerName (name string ) string {
184- if name == "fallback" {
222+ if name == LanguagePlaintext || name == chromaLexerFallback {
185223 return "Plaintext"
186224 }
187-
188225 return util .ToTitleCaseNoLower (name )
189226}
0 commit comments