github.com/shohhei1126/hugo@v0.42.2-0.20180623210752-3d5928889ad7/transform/absurlreplacer.go (about) 1 // Copyright 2015 The Hugo Authors. All rights reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // http://www.apache.org/licenses/LICENSE-2.0 7 // 8 // Unless required by applicable law or agreed to in writing, software 9 // distributed under the License is distributed on an "AS IS" BASIS, 10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package transform 15 16 import ( 17 "bytes" 18 "io" 19 "unicode/utf8" 20 ) 21 22 type matchState int 23 24 const ( 25 matchStateNone matchState = iota 26 matchStateWhitespace 27 matchStatePartial 28 matchStateFull 29 ) 30 31 type absurllexer struct { 32 // the source to absurlify 33 content []byte 34 // the target for the new absurlified content 35 w io.Writer 36 37 // path may be set to a "." relative path 38 path []byte 39 40 pos int // input position 41 start int // item start position 42 width int // width of last element 43 44 matchers []absURLMatcher 45 46 ms matchState 47 matches [3]bool // track matches of the 3 prefixes 48 idx int // last index in matches checked 49 50 } 51 52 type stateFunc func(*absurllexer) stateFunc 53 54 // prefix is how to identify and which func to handle the replacement. 55 type prefix struct { 56 r []rune 57 f func(l *absurllexer) 58 } 59 60 // new prefixes can be added below, but note: 61 // - the matches array above must be expanded. 62 // - the prefix must with the current logic end with '=' 63 var prefixes = []*prefix{ 64 {r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase}, 65 {r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase}, 66 {r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset}, 67 } 68 69 type absURLMatcher struct { 70 match []byte 71 quote []byte 72 } 73 74 // match check rune inside word. Will be != ' '. 75 func (l *absurllexer) match(r rune) { 76 77 var found bool 78 79 // note, the prefixes can start off on the same foot, i.e. 80 // src and srcset. 81 if l.ms == matchStateWhitespace { 82 l.idx = 0 83 for j, p := range prefixes { 84 if r == p.r[l.idx] { 85 l.matches[j] = true 86 found = true 87 // checkMatchState will only return true when r=='=', so 88 // we can safely ignore the return value here. 89 l.checkMatchState(r, j) 90 } 91 } 92 93 if !found { 94 l.ms = matchStateNone 95 } 96 97 return 98 } 99 100 l.idx++ 101 for j, m := range l.matches { 102 // still a match? 103 if m { 104 if prefixes[j].r[l.idx] == r { 105 found = true 106 if l.checkMatchState(r, j) { 107 return 108 } 109 } else { 110 l.matches[j] = false 111 } 112 } 113 } 114 115 if !found { 116 l.ms = matchStateNone 117 } 118 } 119 120 func (l *absurllexer) checkMatchState(r rune, idx int) bool { 121 if r == '=' { 122 l.ms = matchStateFull 123 for k := range l.matches { 124 if k != idx { 125 l.matches[k] = false 126 } 127 } 128 return true 129 } 130 131 l.ms = matchStatePartial 132 133 return false 134 } 135 136 func (l *absurllexer) emit() { 137 l.w.Write(l.content[l.start:l.pos]) 138 l.start = l.pos 139 } 140 141 // handle URLs in src and href. 142 func checkCandidateBase(l *absurllexer) { 143 for _, m := range l.matchers { 144 if !bytes.HasPrefix(l.content[l.pos:], m.match) { 145 continue 146 } 147 // check for schemaless URLs 148 posAfter := l.pos + len(m.match) 149 if posAfter >= len(l.content) { 150 return 151 } 152 r, _ := utf8.DecodeRune(l.content[posAfter:]) 153 if r == '/' { 154 // schemaless: skip 155 return 156 } 157 if l.pos > l.start { 158 l.emit() 159 } 160 l.pos += len(m.match) 161 l.w.Write(m.quote) 162 l.w.Write(l.path) 163 l.start = l.pos 164 } 165 } 166 167 // handle URLs in srcset. 168 func checkCandidateSrcset(l *absurllexer) { 169 // special case, not frequent (me think) 170 for _, m := range l.matchers { 171 if !bytes.HasPrefix(l.content[l.pos:], m.match) { 172 continue 173 } 174 175 // check for schemaless URLs 176 posAfter := l.pos + len(m.match) 177 if posAfter >= len(l.content) { 178 return 179 } 180 r, _ := utf8.DecodeRune(l.content[posAfter:]) 181 if r == '/' { 182 // schemaless: skip 183 continue 184 } 185 186 posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote) 187 188 // safe guard 189 if posLastQuote < 0 || posLastQuote > 2000 { 190 return 191 } 192 193 if l.pos > l.start { 194 l.emit() 195 } 196 197 section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1] 198 199 fields := bytes.Fields(section) 200 l.w.Write(m.quote) 201 for i, f := range fields { 202 if f[0] == '/' { 203 l.w.Write(l.path) 204 l.w.Write(f[1:]) 205 206 } else { 207 l.w.Write(f) 208 } 209 210 if i < len(fields)-1 { 211 l.w.Write([]byte(" ")) 212 } 213 } 214 215 l.w.Write(m.quote) 216 l.pos += len(section) + (len(m.quote) * 2) 217 l.start = l.pos 218 } 219 } 220 221 // main loop 222 func (l *absurllexer) replace() { 223 contentLength := len(l.content) 224 var r rune 225 226 for { 227 if l.pos >= contentLength { 228 l.width = 0 229 break 230 } 231 232 var width = 1 233 r = rune(l.content[l.pos]) 234 if r >= utf8.RuneSelf { 235 r, width = utf8.DecodeRune(l.content[l.pos:]) 236 } 237 l.width = width 238 l.pos += l.width 239 if r == ' ' { 240 l.ms = matchStateWhitespace 241 } else if l.ms != matchStateNone { 242 l.match(r) 243 if l.ms == matchStateFull { 244 var p *prefix 245 for i, m := range l.matches { 246 if m { 247 p = prefixes[i] 248 l.matches[i] = false 249 } 250 } 251 l.ms = matchStateNone 252 p.f(l) 253 } 254 } 255 } 256 257 // Done! 258 if l.pos > l.start { 259 l.emit() 260 } 261 } 262 263 func doReplace(ct contentTransformer, matchers []absURLMatcher) { 264 265 lexer := &absurllexer{ 266 content: ct.Content(), 267 w: ct, 268 path: ct.Path(), 269 matchers: matchers} 270 271 lexer.replace() 272 } 273 274 type absURLReplacer struct { 275 htmlMatchers []absURLMatcher 276 xmlMatchers []absURLMatcher 277 } 278 279 func newAbsURLReplacer() *absURLReplacer { 280 281 // HTML 282 dqHTMLMatch := []byte("\"/") 283 sqHTMLMatch := []byte("'/") 284 285 // XML 286 dqXMLMatch := []byte(""/") 287 sqXMLMatch := []byte("'/") 288 289 dqHTML := []byte("\"") 290 sqHTML := []byte("'") 291 292 dqXML := []byte(""") 293 sqXML := []byte("'") 294 295 return &absURLReplacer{ 296 htmlMatchers: []absURLMatcher{ 297 {dqHTMLMatch, dqHTML}, 298 {sqHTMLMatch, sqHTML}, 299 }, 300 xmlMatchers: []absURLMatcher{ 301 {dqXMLMatch, dqXML}, 302 {sqXMLMatch, sqXML}, 303 }} 304 } 305 306 func (au *absURLReplacer) replaceInHTML(ct contentTransformer) { 307 doReplace(ct, au.htmlMatchers) 308 } 309 310 func (au *absURLReplacer) replaceInXML(ct contentTransformer) { 311 doReplace(ct, au.xmlMatchers) 312 }