github.com/sunshinekia/hugo@v0.47.1/transform/urlreplacers/absurlreplacer.go (about)

     1  // Copyright 2018 The Hugo Authors. All rights reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package urlreplacers
    15  
    16  import (
    17  	"bytes"
    18  	"io"
    19  	"unicode/utf8"
    20  
    21  	"github.com/gohugoio/hugo/transform"
    22  )
    23  
    24  type matchState int
    25  
    26  const (
    27  	matchStateNone matchState = iota
    28  	matchStateWhitespace
    29  	matchStatePartial
    30  	matchStateFull
    31  )
    32  
    33  type absurllexer struct {
    34  	// the source to absurlify
    35  	content []byte
    36  	// the target for the new absurlified content
    37  	w io.Writer
    38  
    39  	// path may be set to a "." relative path
    40  	path []byte
    41  
    42  	pos   int // input position
    43  	start int // item start position
    44  	width int // width of last element
    45  
    46  	matchers []absURLMatcher
    47  
    48  	ms      matchState
    49  	matches [3]bool // track matches of the 3 prefixes
    50  	idx     int     // last index in matches checked
    51  
    52  }
    53  
    54  type stateFunc func(*absurllexer) stateFunc
    55  
    56  // prefix is how to identify and which func to handle the replacement.
    57  type prefix struct {
    58  	r []rune
    59  	f func(l *absurllexer)
    60  }
    61  
    62  // new prefixes can be added below, but note:
    63  // - the matches array above must be expanded.
    64  // - the prefix must with the current logic end with '='
    65  var prefixes = []*prefix{
    66  	{r: []rune{'s', 'r', 'c', '='}, f: checkCandidateBase},
    67  	{r: []rune{'h', 'r', 'e', 'f', '='}, f: checkCandidateBase},
    68  	{r: []rune{'s', 'r', 'c', 's', 'e', 't', '='}, f: checkCandidateSrcset},
    69  }
    70  
    71  type absURLMatcher struct {
    72  	match []byte
    73  	quote []byte
    74  }
    75  
    76  // match check rune inside word. Will be != ' '.
    77  func (l *absurllexer) match(r rune) {
    78  
    79  	var found bool
    80  
    81  	// note, the prefixes can start off on the same foot, i.e.
    82  	// src and srcset.
    83  	if l.ms == matchStateWhitespace {
    84  		l.idx = 0
    85  		for j, p := range prefixes {
    86  			if r == p.r[l.idx] {
    87  				l.matches[j] = true
    88  				found = true
    89  				// checkMatchState will only return true when r=='=', so
    90  				// we can safely ignore the return value here.
    91  				l.checkMatchState(r, j)
    92  			}
    93  		}
    94  
    95  		if !found {
    96  			l.ms = matchStateNone
    97  		}
    98  
    99  		return
   100  	}
   101  
   102  	l.idx++
   103  	for j, m := range l.matches {
   104  		// still a match?
   105  		if m {
   106  			if prefixes[j].r[l.idx] == r {
   107  				found = true
   108  				if l.checkMatchState(r, j) {
   109  					return
   110  				}
   111  			} else {
   112  				l.matches[j] = false
   113  			}
   114  		}
   115  	}
   116  
   117  	if !found {
   118  		l.ms = matchStateNone
   119  	}
   120  }
   121  
   122  func (l *absurllexer) checkMatchState(r rune, idx int) bool {
   123  	if r == '=' {
   124  		l.ms = matchStateFull
   125  		for k := range l.matches {
   126  			if k != idx {
   127  				l.matches[k] = false
   128  			}
   129  		}
   130  		return true
   131  	}
   132  
   133  	l.ms = matchStatePartial
   134  
   135  	return false
   136  }
   137  
   138  func (l *absurllexer) emit() {
   139  	l.w.Write(l.content[l.start:l.pos])
   140  	l.start = l.pos
   141  }
   142  
   143  // handle URLs in src and href.
   144  func checkCandidateBase(l *absurllexer) {
   145  	for _, m := range l.matchers {
   146  		if !bytes.HasPrefix(l.content[l.pos:], m.match) {
   147  			continue
   148  		}
   149  		// check for schemaless URLs
   150  		posAfter := l.pos + len(m.match)
   151  		if posAfter >= len(l.content) {
   152  			return
   153  		}
   154  		r, _ := utf8.DecodeRune(l.content[posAfter:])
   155  		if r == '/' {
   156  			// schemaless: skip
   157  			return
   158  		}
   159  		if l.pos > l.start {
   160  			l.emit()
   161  		}
   162  		l.pos += len(m.match)
   163  		l.w.Write(m.quote)
   164  		l.w.Write(l.path)
   165  		l.start = l.pos
   166  	}
   167  }
   168  
   169  // handle URLs in srcset.
   170  func checkCandidateSrcset(l *absurllexer) {
   171  	// special case, not frequent (me think)
   172  	for _, m := range l.matchers {
   173  		if !bytes.HasPrefix(l.content[l.pos:], m.match) {
   174  			continue
   175  		}
   176  
   177  		// check for schemaless URLs
   178  		posAfter := l.pos + len(m.match)
   179  		if posAfter >= len(l.content) {
   180  			return
   181  		}
   182  		r, _ := utf8.DecodeRune(l.content[posAfter:])
   183  		if r == '/' {
   184  			// schemaless: skip
   185  			continue
   186  		}
   187  
   188  		posLastQuote := bytes.Index(l.content[l.pos+1:], m.quote)
   189  
   190  		// safe guard
   191  		if posLastQuote < 0 || posLastQuote > 2000 {
   192  			return
   193  		}
   194  
   195  		if l.pos > l.start {
   196  			l.emit()
   197  		}
   198  
   199  		section := l.content[l.pos+len(m.quote) : l.pos+posLastQuote+1]
   200  
   201  		fields := bytes.Fields(section)
   202  		l.w.Write(m.quote)
   203  		for i, f := range fields {
   204  			if f[0] == '/' {
   205  				l.w.Write(l.path)
   206  				l.w.Write(f[1:])
   207  
   208  			} else {
   209  				l.w.Write(f)
   210  			}
   211  
   212  			if i < len(fields)-1 {
   213  				l.w.Write([]byte(" "))
   214  			}
   215  		}
   216  
   217  		l.w.Write(m.quote)
   218  		l.pos += len(section) + (len(m.quote) * 2)
   219  		l.start = l.pos
   220  	}
   221  }
   222  
   223  // main loop
   224  func (l *absurllexer) replace() {
   225  	contentLength := len(l.content)
   226  	var r rune
   227  
   228  	for {
   229  		if l.pos >= contentLength {
   230  			l.width = 0
   231  			break
   232  		}
   233  
   234  		var width = 1
   235  		r = rune(l.content[l.pos])
   236  		if r >= utf8.RuneSelf {
   237  			r, width = utf8.DecodeRune(l.content[l.pos:])
   238  		}
   239  		l.width = width
   240  		l.pos += l.width
   241  		if r == ' ' {
   242  			l.ms = matchStateWhitespace
   243  		} else if l.ms != matchStateNone {
   244  			l.match(r)
   245  			if l.ms == matchStateFull {
   246  				var p *prefix
   247  				for i, m := range l.matches {
   248  					if m {
   249  						p = prefixes[i]
   250  						l.matches[i] = false
   251  					}
   252  				}
   253  				l.ms = matchStateNone
   254  				p.f(l)
   255  			}
   256  		}
   257  	}
   258  
   259  	// Done!
   260  	if l.pos > l.start {
   261  		l.emit()
   262  	}
   263  }
   264  
   265  func doReplace(path string, ct transform.FromTo, matchers []absURLMatcher) {
   266  
   267  	lexer := &absurllexer{
   268  		content:  ct.From().Bytes(),
   269  		w:        ct.To(),
   270  		path:     []byte(path),
   271  		matchers: matchers}
   272  
   273  	lexer.replace()
   274  }
   275  
   276  type absURLReplacer struct {
   277  	htmlMatchers []absURLMatcher
   278  	xmlMatchers  []absURLMatcher
   279  }
   280  
   281  func newAbsURLReplacer() *absURLReplacer {
   282  
   283  	// HTML
   284  	dqHTMLMatch := []byte("\"/")
   285  	sqHTMLMatch := []byte("'/")
   286  
   287  	// XML
   288  	dqXMLMatch := []byte("&#34;/")
   289  	sqXMLMatch := []byte("&#39;/")
   290  
   291  	dqHTML := []byte("\"")
   292  	sqHTML := []byte("'")
   293  
   294  	dqXML := []byte("&#34;")
   295  	sqXML := []byte("&#39;")
   296  
   297  	return &absURLReplacer{
   298  		htmlMatchers: []absURLMatcher{
   299  			{dqHTMLMatch, dqHTML},
   300  			{sqHTMLMatch, sqHTML},
   301  		},
   302  		xmlMatchers: []absURLMatcher{
   303  			{dqXMLMatch, dqXML},
   304  			{sqXMLMatch, sqXML},
   305  		}}
   306  }
   307  
   308  func (au *absURLReplacer) replaceInHTML(path string, ct transform.FromTo) {
   309  	doReplace(path, ct, au.htmlMatchers)
   310  }
   311  
   312  func (au *absURLReplacer) replaceInXML(path string, ct transform.FromTo) {
   313  	doReplace(path, ct, au.xmlMatchers)
   314  }