github.com/shohhei1126/hugo@v0.42.2-0.20180623210752-3d5928889ad7/transform/absurlreplacer.go (about)

     1  // Copyright 2015 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package transform
    15  
    16  import (
    17  	"bytes"
    18  	"io"
    19  	"unicode/utf8"
    20  )
    21  
    22  type matchState int
    23  
    24  const (
    25  	matchStateNone matchState = iota
    26  	matchStateWhitespace
    27  	matchStatePartial
    28  	matchStateFull
    29  )
    30  
    31  type absurllexer struct {
    32  	// the source to absurlify
    33  	content []byte
    34  	// the target for the new absurlified content
    35  	w io.Writer
    36  
    37  	// path may be set to a "." relative path
    38  	path []byte
    39  
    40  	pos   int // input position
    41  	start int // item start position
    42  	width int // width of last element
    43  
    44  	matchers []absURLMatcher
    45  
    46  	ms      matchState
    47  	matches [3]bool // track matches of the 3 prefixes
    48  	idx     int     // last index in matches checked
    49  
    50  }
    51  
    52  type stateFunc func(*absurllexer) stateFunc
    53  
    54  // prefix is how to identify and which func to handle the replacement.
    55  type prefix struct {
    56  	r []rune
    57  	f func(l *absurllexer)
    58  }
    59  
    60  // new prefixes can be added below, but note:
    61  // - the matches array above must be expanded.
    62  // - the prefix must with the current logic end with '='
    63  var prefixes = []*prefix{
    64  	{r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase},
    65  	{r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase},
    66  	{r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset},
    67  }
    68  
    69  type absURLMatcher struct {
    70  	match []byte
    71  	quote []byte
    72  }
    73  
    74  // match check rune inside word. Will be != ' '.
    75  func (l *absurllexer) match(r rune) {
    76  
    77  	var found bool
    78  
    79  	// note, the prefixes can start off on the same foot, i.e.
    80  	// src and srcset.
    81  	if l.ms == matchStateWhitespace {
    82  		l.idx = 0
    83  		for j, p := range prefixes {
    84  			if r == p.r[l.idx] {
    85  				l.matches[j] = true
    86  				found = true
    87  				// checkMatchState will only return true when r=='=', so
    88  				// we can safely ignore the return value here.
    89  				l.checkMatchState(r, j)
    90  			}
    91  		}
    92  
    93  		if !found {
    94  			l.ms = matchStateNone
    95  		}
    96  
    97  		return
    98  	}
    99  
   100  	l.idx++
   101  	for j, m := range l.matches {
   102  		// still a match?
   103  		if m {
   104  			if prefixes[j].r[l.idx] == r {
   105  				found = true
   106  				if l.checkMatchState(r, j) {
   107  					return
   108  				}
   109  			} else {
   110  				l.matches[j] = false
   111  			}
   112  		}
   113  	}
   114  
   115  	if !found {
   116  		l.ms = matchStateNone
   117  	}
   118  }
   119  
   120  func (l *absurllexer) checkMatchState(r rune, idx int) bool {
   121  	if r == '=' {
   122  		l.ms = matchStateFull
   123  		for k := range l.matches {
   124  			if k != idx {
   125  				l.matches[k] = false
   126  			}
   127  		}
   128  		return true
   129  	}
   130  
   131  	l.ms = matchStatePartial
   132  
   133  	return false
   134  }
   135  
   136  func (l *absurllexer) emit() {
   137  	l.w.Write(l.content[l.start:l.pos])
   138  	l.start = l.pos
   139  }
   140  
   141  // handle URLs in src and href.
   142  func checkCandidateBase(l *absurllexer) {
   143  	for _, m := range l.matchers {
   144  		if !bytes.HasPrefix(l.content[l.pos:], m.match) {
   145  			continue
   146  		}
   147  		// check for schemaless URLs
   148  		posAfter := l.pos + len(m.match)
   149  		if posAfter >= len(l.content) {
   150  			return
   151  		}
   152  		r, _ := utf8.DecodeRune(l.content[posAfter:])
   153  		if r == '/' {
   154  			// schemaless: skip
   155  			return
   156  		}
   157  		if l.pos > l.start {
   158  			l.emit()
   159  		}
   160  		l.pos += len(m.match)
   161  		l.w.Write(m.quote)
   162  		l.w.Write(l.path)
   163  		l.start = l.pos
   164  	}
   165  }
   166  
   167  // handle URLs in srcset.
   168  func checkCandidateSrcset(l *absurllexer) {
   169  	// special case, not frequent (me think)
   170  	for _, m := range l.matchers {
   171  		if !bytes.HasPrefix(l.content[l.pos:], m.match) {
   172  			continue
   173  		}
   174  
   175  		// check for schemaless URLs
   176  		posAfter := l.pos + len(m.match)
   177  		if posAfter >= len(l.content) {
   178  			return
   179  		}
   180  		r, _ := utf8.DecodeRune(l.content[posAfter:])
   181  		if r == '/' {
   182  			// schemaless: skip
   183  			continue
   184  		}
   185  
   186  		posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote)
   187  
   188  		// safe guard
   189  		if posLastQuote < 0 || posLastQuote > 2000 {
   190  			return
   191  		}
   192  
   193  		if l.pos > l.start {
   194  			l.emit()
   195  		}
   196  
   197  		section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1]
   198  
   199  		fields := bytes.Fields(section)
   200  		l.w.Write(m.quote)
   201  		for i, f := range fields {
   202  			if f[0] == '/' {
   203  				l.w.Write(l.path)
   204  				l.w.Write(f[1:])
   205  
   206  			} else {
   207  				l.w.Write(f)
   208  			}
   209  
   210  			if i < len(fields)-1 {
   211  				l.w.Write([]byte(" "))
   212  			}
   213  		}
   214  
   215  		l.w.Write(m.quote)
   216  		l.pos += len(section) + (len(m.quote) * 2)
   217  		l.start = l.pos
   218  	}
   219  }
   220  
   221  // main loop
   222  func (l *absurllexer) replace() {
   223  	contentLength := len(l.content)
   224  	var r rune
   225  
   226  	for {
   227  		if l.pos >= contentLength {
   228  			l.width = 0
   229  			break
   230  		}
   231  
   232  		var width = 1
   233  		r = rune(l.content[l.pos])
   234  		if r >= utf8.RuneSelf {
   235  			r, width = utf8.DecodeRune(l.content[l.pos:])
   236  		}
   237  		l.width = width
   238  		l.pos += l.width
   239  		if r == ' ' {
   240  			l.ms = matchStateWhitespace
   241  		} else if l.ms != matchStateNone {
   242  			l.match(r)
   243  			if l.ms == matchStateFull {
   244  				var p *prefix
   245  				for i, m := range l.matches {
   246  					if m {
   247  						p = prefixes[i]
   248  						l.matches[i] = false
   249  					}
   250  				}
   251  				l.ms = matchStateNone
   252  				p.f(l)
   253  			}
   254  		}
   255  	}
   256  
   257  	// Done!
   258  	if l.pos > l.start {
   259  		l.emit()
   260  	}
   261  }
   262  
   263  func doReplace(ct contentTransformer, matchers []absURLMatcher) {
   264  
   265  	lexer := &absurllexer{
   266  		content:  ct.Content(),
   267  		w:        ct,
   268  		path:     ct.Path(),
   269  		matchers: matchers}
   270  
   271  	lexer.replace()
   272  }
   273  
   274  type absURLReplacer struct {
   275  	htmlMatchers []absURLMatcher
   276  	xmlMatchers  []absURLMatcher
   277  }
   278  
   279  func newAbsURLReplacer() *absURLReplacer {
   280  
   281  	// HTML
   282  	dqHTMLMatch := []byte("\"/")
   283  	sqHTMLMatch := []byte("'/")
   284  
   285  	// XML
   286  	dqXMLMatch := []byte("&#34;/")
   287  	sqXMLMatch := []byte("&#39;/")
   288  
   289  	dqHTML := []byte("\"")
   290  	sqHTML := []byte("'")
   291  
   292  	dqXML := []byte("&#34;")
   293  	sqXML := []byte("&#39;")
   294  
   295  	return &absURLReplacer{
   296  		htmlMatchers: []absURLMatcher{
   297  			{dqHTMLMatch, dqHTML},
   298  			{sqHTMLMatch, sqHTML},
   299  		},
   300  		xmlMatchers: []absURLMatcher{
   301  			{dqXMLMatch, dqXML},
   302  			{sqXMLMatch, sqXML},
   303  		}}
   304  }
   305  
   306  func (au *absURLReplacer) replaceInHTML(ct contentTransformer) {
   307  	doReplace(ct, au.htmlMatchers)
   308  }
   309  
   310  func (au *absURLReplacer) replaceInXML(ct contentTransformer) {
   311  	doReplace(ct, au.xmlMatchers)
   312  }