github.com/graybobo/golang.org-package-offline-cache@v0.0.0-20200626051047-6608995c132f/x/text/search/pattern.go (about)

     1  // Copyright 2015 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package search
     6  
     7  import (
     8  	"golang.org/x/text/collate/colltab"
     9  	newcolltab "golang.org/x/text/internal/colltab"
    10  )
    11  
    12  // TODO: handle variable primary weights?
    13  
    14  func (p *Pattern) deleteEmptyElements() {
    15  	k := 0
    16  	for _, e := range p.ce {
    17  		if !isIgnorable(p.m, e) {
    18  			p.ce[k] = e
    19  			k++
    20  		}
    21  	}
    22  	p.ce = p.ce[:k]
    23  }
    24  
    25  func isIgnorable(m *Matcher, e colltab.Elem) bool {
    26  	if e.Primary() > 0 {
    27  		return false
    28  	}
    29  	if e.Secondary() > 0 {
    30  		if !m.ignoreDiacritics {
    31  			return false
    32  		}
    33  		// Primary value is 0 and ignoreDiacritics is true. In this case we
    34  		// ignore the tertiary element, as it only pertains to the modifier.
    35  		return true
    36  	}
    37  	// TODO: further distinguish once we have the new implementation.
    38  	if !(m.ignoreWidth || m.ignoreCase) && e.Tertiary() > 0 {
    39  		return false
    40  	}
    41  	// TODO: we ignore the Quaternary level for now.
    42  	return true
    43  }
    44  
    45  // TODO: Use a Boyer-Moore-like algorithm (probably Sunday) for searching.
    46  
    47  func (p *Pattern) forwardSearch(it *newcolltab.Iter) (start, end int) {
    48  	for start := 0; it.Next(); it.Reset(start) {
    49  		nextStart := it.End()
    50  		if end := p.searchOnce(it); end != -1 {
    51  			return start, end
    52  		}
    53  		start = nextStart
    54  	}
    55  	return -1, -1
    56  }
    57  
    58  func (p *Pattern) anchoredForwardSearch(it *newcolltab.Iter) (start, end int) {
    59  	if it.Next() {
    60  		if end := p.searchOnce(it); end != -1 {
    61  			return 0, end
    62  		}
    63  	}
    64  	return -1, -1
    65  }
    66  
    67  // next advances to the next weight in a pattern. f must return one of the
    68  // weights of a collation element. next will advance to the first non-zero
    69  // weight and return this weight and true if it exists, or 0, false otherwise.
    70  func (p *Pattern) next(i *int, f func(colltab.Elem) int) (weight int, ok bool) {
    71  	for *i < len(p.ce) {
    72  		v := f(p.ce[*i])
    73  		*i++
    74  		if v != 0 {
    75  			// Skip successive ignorable values.
    76  			for ; *i < len(p.ce) && f(p.ce[*i]) == 0; *i++ {
    77  			}
    78  			return v, true
    79  		}
    80  	}
    81  	return 0, false
    82  }
    83  
    84  // TODO: remove this function once Elem is internal and Tertiary returns int.
    85  func tertiary(e colltab.Elem) int {
    86  	return int(e.Tertiary())
    87  }
    88  
    89  // searchOnce tries to match the pattern s.p at the text position i. s.buf needs
    90  // to be filled with collation elements of the first segment, where n is the
    91  // number of source bytes consumed for this segment. It will return the end
    92  // position of the match or -1.
    93  func (p *Pattern) searchOnce(it *newcolltab.Iter) (end int) {
    94  	var pLevel [4]int
    95  
    96  	m := p.m
    97  	for {
    98  		k := 0
    99  		for ; k < it.N; k++ {
   100  			if v := it.Elems[k].Primary(); v > 0 {
   101  				if w, ok := p.next(&pLevel[0], colltab.Elem.Primary); !ok || v != w {
   102  					return -1
   103  				}
   104  			}
   105  
   106  			if !m.ignoreDiacritics {
   107  				if v := it.Elems[k].Secondary(); v > 0 {
   108  					if w, ok := p.next(&pLevel[1], colltab.Elem.Secondary); !ok || v != w {
   109  						return -1
   110  					}
   111  				}
   112  			} else if it.Elems[k].Primary() == 0 {
   113  				// We ignore tertiary values of collation elements of the
   114  				// secondary level.
   115  				continue
   116  			}
   117  
   118  			// TODO: distinguish between case and width. This will be easier to
   119  			// implement after we moved to the new collation implementation.
   120  			if !m.ignoreWidth && !m.ignoreCase {
   121  				if v := it.Elems[k].Tertiary(); v > 0 {
   122  					if w, ok := p.next(&pLevel[2], tertiary); !ok || int(v) != w {
   123  						return -1
   124  					}
   125  				}
   126  			}
   127  			// TODO: check quaternary weight
   128  		}
   129  		it.Discard() // Remove the current segment from the buffer.
   130  
   131  		// Check for completion.
   132  		switch {
   133  		// If any of these cases match, we are not at the end.
   134  		case pLevel[0] < len(p.ce):
   135  		case !m.ignoreDiacritics && pLevel[1] < len(p.ce):
   136  		case !(m.ignoreWidth || m.ignoreCase) && pLevel[2] < len(p.ce):
   137  		default:
   138  			// At this point, both the segment and pattern has matched fully.
   139  			// However, the segment may still be have trailing modifiers.
   140  			// This can be verified by another call to next.
   141  			end = it.End()
   142  			if it.Next() && it.Elems[0].Primary() == 0 {
   143  				if !m.ignoreDiacritics {
   144  					return -1
   145  				}
   146  				end = it.End()
   147  			}
   148  			return end
   149  		}
   150  
   151  		// Fill the buffer with the next batch of collation elements.
   152  		if !it.Next() {
   153  			return -1
   154  		}
   155  	}
   156  }