github.com/sunshinekia/hugo@v0.47.1/transform/urlreplacers/absurlreplacer.go (about) 1 // Copyright 2018 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package urlreplacers 15 16 import ( 17 "bytes" 18 "io" 19 "unicode/utf8" 20 21 "github.com/gohugoio/hugo/transform" 22 ) 23 24 type matchState int 25 26 const ( 27 matchStateNone matchState = iota 28 matchStateWhitespace 29 matchStatePartial 30 matchStateFull 31 ) 32 33 type absurllexer struct { 34 // the source to absurlify 35 content []byte 36 // the target for the new absurlified content 37 w io.Writer 38 39 // path may be set to a "." relative path 40 path []byte 41 42 pos int // input position 43 start int // item start position 44 width int // width of last element 45 46 matchers []absURLMatcher 47 48 ms matchState 49 matches [3]bool // track matches of the 3 prefixes 50 idx int // last index in matches checked 51 52 } 53 54 type stateFunc func(*absurllexer) stateFunc 55 56 // prefix is how to identify and which func to handle the replacement. 57 type prefix struct { 58 r []rune 59 f func(l *absurllexer) 60 } 61 62 // new prefixes can be added below, but note: 63 // - the matches array above must be expanded. 64 // - the prefix must with the current logic end with '=' 65 var prefixes = []*prefix{ 66 {r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase}, 67 {r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase}, 68 {r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset}, 69 } 70 71 type absURLMatcher struct { 72 match []byte 73 quote []byte 74 } 75 76 // match check rune inside word. Will be != ' '. 77 func (l *absurllexer) match(r rune) { 78 79 var found bool 80 81 // note, the prefixes can start off on the same foot, i.e. 82 // src and srcset. 83 if l.ms == matchStateWhitespace { 84 l.idx = 0 85 for j, p := range prefixes { 86 if r == p.r[l.idx] { 87 l.matches[j] = true 88 found = true 89 // checkMatchState will only return true when r=='=', so 90 // we can safely ignore the return value here. 91 l.checkMatchState(r, j) 92 } 93 } 94 95 if !found { 96 l.ms = matchStateNone 97 } 98 99 return 100 } 101 102 l.idx++ 103 for j, m := range l.matches { 104 // still a match? 105 if m { 106 if prefixes[j].r[l.idx] == r { 107 found = true 108 if l.checkMatchState(r, j) { 109 return 110 } 111 } else { 112 l.matches[j] = false 113 } 114 } 115 } 116 117 if !found { 118 l.ms = matchStateNone 119 } 120 } 121 122 func (l *absurllexer) checkMatchState(r rune, idx int) bool { 123 if r == '=' { 124 l.ms = matchStateFull 125 for k := range l.matches { 126 if k != idx { 127 l.matches[k] = false 128 } 129 } 130 return true 131 } 132 133 l.ms = matchStatePartial 134 135 return false 136 } 137 138 func (l *absurllexer) emit() { 139 l.w.Write(l.content[l.start:l.pos]) 140 l.start = l.pos 141 } 142 143 // handle URLs in src and href. 144 func checkCandidateBase(l *absurllexer) { 145 for _, m := range l.matchers { 146 if !bytes.HasPrefix(l.content[l.pos:], m.match) { 147 continue 148 } 149 // check for schemaless URLs 150 posAfter := l.pos + len(m.match) 151 if posAfter >= len(l.content) { 152 return 153 } 154 r, _ := utf8.DecodeRune(l.content[posAfter:]) 155 if r == '/' { 156 // schemaless: skip 157 return 158 } 159 if l.pos > l.start { 160 l.emit() 161 } 162 l.pos += len(m.match) 163 l.w.Write(m.quote) 164 l.w.Write(l.path) 165 l.start = l.pos 166 } 167 } 168 169 // handle URLs in srcset. 170 func checkCandidateSrcset(l *absurllexer) { 171 // special case, not frequent (me think) 172 for _, m := range l.matchers { 173 if !bytes.HasPrefix(l.content[l.pos:], m.match) { 174 continue 175 } 176 177 // check for schemaless URLs 178 posAfter := l.pos + len(m.match) 179 if posAfter >= len(l.content) { 180 return 181 } 182 r, _ := utf8.DecodeRune(l.content[posAfter:]) 183 if r == '/' { 184 // schemaless: skip 185 continue 186 } 187 188 posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote) 189 190 // safe guard 191 if posLastQuote < 0 || posLastQuote > 2000 { 192 return 193 } 194 195 if l.pos > l.start { 196 l.emit() 197 } 198 199 section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1] 200 201 fields := bytes.Fields(section) 202 l.w.Write(m.quote) 203 for i, f := range fields { 204 if f[0] == '/' { 205 l.w.Write(l.path) 206 l.w.Write(f[1:]) 207 208 } else { 209 l.w.Write(f) 210 } 211 212 if i < len(fields)-1 { 213 l.w.Write([]byte(" ")) 214 } 215 } 216 217 l.w.Write(m.quote) 218 l.pos += len(section) + (len(m.quote) * 2) 219 l.start = l.pos 220 } 221 } 222 223 // main loop 224 func (l *absurllexer) replace() { 225 contentLength := len(l.content) 226 var r rune 227 228 for { 229 if l.pos >= contentLength { 230 l.width = 0 231 break 232 } 233 234 var width = 1 235 r = rune(l.content[l.pos]) 236 if r >= utf8.RuneSelf { 237 r, width = utf8.DecodeRune(l.content[l.pos:]) 238 } 239 l.width = width 240 l.pos += l.width 241 if r == ' ' { 242 l.ms = matchStateWhitespace 243 } else if l.ms != matchStateNone { 244 l.match(r) 245 if l.ms == matchStateFull { 246 var p *prefix 247 for i, m := range l.matches { 248 if m { 249 p = prefixes[i] 250 l.matches[i] = false 251 } 252 } 253 l.ms = matchStateNone 254 p.f(l) 255 } 256 } 257 } 258 259 // Done! 260 if l.pos > l.start { 261 l.emit() 262 } 263 } 264 265 func doReplace(path string, ct transform.FromTo, matchers []absURLMatcher) { 266 267 lexer := &absurllexer{ 268 content: ct.From().Bytes(), 269 w: ct.To(), 270 path: []byte(path), 271 matchers: matchers} 272 273 lexer.replace() 274 } 275 276 type absURLReplacer struct { 277 htmlMatchers []absURLMatcher 278 xmlMatchers []absURLMatcher 279 } 280 281 func newAbsURLReplacer() *absURLReplacer { 282 283 // HTML 284 dqHTMLMatch := []byte("\"/") 285 sqHTMLMatch := []byte("'/") 286 287 // XML 288 dqXMLMatch := []byte(""/") 289 sqXMLMatch := []byte("'/") 290 291 dqHTML := []byte("\"") 292 sqHTML := []byte("'") 293 294 dqXML := []byte(""") 295 sqXML := []byte("'") 296 297 return &absURLReplacer{ 298 htmlMatchers: []absURLMatcher{ 299 {dqHTMLMatch, dqHTML}, 300 {sqHTMLMatch, sqHTML}, 301 }, 302 xmlMatchers: []absURLMatcher{ 303 {dqXMLMatch, dqXML}, 304 {sqXMLMatch, sqXML}, 305 }} 306 } 307 308 func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) { 309 doReplace(path, ct, au.htmlMatchers) 310 } 311 312 func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) { 313 doReplace(path, ct, au.xmlMatchers) 314 }