github.com/d-tsuji/suffixarray@v0.0.0-20200625031310-5b0c40604e73/suffixarray.go (about)

     1  package suffixarray
     2  
     3  import (
     4  	"strings"
     5  )
     6  
     7  type Manber struct {
     8  	// length of input string
     9  	N int
    10  
    11  	// input text (ASCII only)
    12  	Text string
    13  
    14  	// offset of ith string in order
    15  	Index []int
    16  
    17  	// Rank of ith string
    18  	Rank []int
    19  
    20  	// Rank of ith string (temporary)
    21  	newrank []int
    22  
    23  	offset int
    24  }
    25  
    26  // New creates a new Manber.
    27  func New(s string) *Manber {
    28  	n := len(s)
    29  	m := &Manber{
    30  		N:       n,
    31  		Text:    s,
    32  		Index:   make([]int, n+1),
    33  		Rank:    make([]int, n+1),
    34  		newrank: make([]int, n+1),
    35  	}
    36  	// sentinels
    37  	m.Index[n] = n
    38  	m.Rank[n] = -1
    39  
    40  	return m
    41  }
    42  
    43  // Build builds a SuffixArray.
    44  // Building time is O(N (logN)^2) where N is the
    45  // size of the input string data.
    46  func (m *Manber) Build() {
    47  	m.msd()
    48  	m.doit()
    49  }
    50  
    51  func (m *Manber) LookupAll(p string) []int {
    52  	var left, right int
    53  
    54  	// Find the maximum index where the result of strings.Compare is -1.
    55  	l := 0
    56  	r := m.N
    57  	for r-l > 1 {
    58  		mid := (l + r) >> 1
    59  		cmp := strings.Compare(m.Text[m.Index[mid]:min(m.Index[mid]+len(p), m.N)], p)
    60  		if cmp < 0 {
    61  			l = mid
    62  		} else {
    63  			r = mid
    64  		}
    65  	}
    66  	left = l
    67  
    68  	// Find the maximum index where the result of strings.Compare is 0.
    69  	l = 0
    70  	r = m.N
    71  	for r-l > 1 {
    72  		mid := (l + r) >> 1
    73  		cmp := strings.Compare(m.Text[m.Index[mid]:min(m.Index[mid]+len(p), m.N)], p)
    74  		if cmp <= 0 {
    75  			l = mid
    76  		} else {
    77  			r = mid
    78  		}
    79  	}
    80  	right = l
    81  
    82  	result := make([]int, 0, right-left)
    83  	for i := left + 1; i <= right; i++ {
    84  		result = append(result, m.Index[i])
    85  	}
    86  	return result
    87  }
    88  
    89  func (m *Manber) msd() {
    90  	const R int = 256
    91  
    92  	// calculate frequencies
    93  	freq := make([]int, R)
    94  	for i := 0; i < m.N; i++ {
    95  		freq[m.Text[i]]++
    96  	}
    97  
    98  	// calculate cumulative frequencies
    99  	cumm := make([]int, R)
   100  	for i := 1; i < R; i++ {
   101  		cumm[i] = cumm[i-1] + freq[i-1]
   102  	}
   103  
   104  	// compute ranks
   105  	for i := 0; i < m.N; i++ {
   106  		m.Rank[i] = cumm[m.Text[i]]
   107  	}
   108  
   109  	// sort by first char
   110  	for i := 0; i < m.N; i++ {
   111  		m.Index[cumm[m.Text[i]]] = i
   112  		cumm[m.Text[i]]++
   113  	}
   114  }
   115  
   116  func (m *Manber) doit() {
   117  	for m.offset = 1; m.offset < m.N; m.offset += m.offset {
   118  		var count int
   119  		for i := 1; i <= m.N; i++ {
   120  			if m.Rank[m.Index[i]] == m.Rank[m.Index[i-1]] {
   121  				count++
   122  			} else if count > 0 {
   123  				// sort
   124  				left := i - 1 - count
   125  				right := i - 1
   126  				m.quicksort(left, right)
   127  
   128  				// now fix up ranks
   129  				r := m.Rank[m.Index[left]]
   130  				for j := left + 1; j <= right; j++ {
   131  					if m.less(m.Index[j-1], m.Index[j]) {
   132  						r = m.Rank[m.Index[left]] + j - left
   133  					}
   134  					m.newrank[m.Index[j]] = r
   135  				}
   136  
   137  				// copy back - note can't update rank too eagerly
   138  				for j := left + 1; j <= right; j++ {
   139  					m.Rank[m.Index[j]] = m.newrank[m.Index[j]]
   140  				}
   141  
   142  				count = 0
   143  			}
   144  		}
   145  	}
   146  }
   147  
   148  // -----------------------------------------
   149  // Helper functions for comparing suffixes.
   150  // -----------------------------------------
   151  
   152  func (m *Manber) quicksort(lo, hi int) {
   153  	if hi <= lo {
   154  		return
   155  	}
   156  	i := m.partition(lo, hi)
   157  	m.quicksort(lo, i-1)
   158  	m.quicksort(i+1, hi)
   159  }
   160  
   161  func (m *Manber) partition(lo, hi int) int {
   162  	i, j, v := lo-1, hi, m.Index[hi]
   163  	for {
   164  		// find item on left to swap
   165  		i++
   166  		for m.less(m.Index[i], v) {
   167  			if i == hi {
   168  				break
   169  			}
   170  			i++
   171  		}
   172  
   173  		// find item on right to swap
   174  		j--
   175  		for m.less(v, m.Index[j]) {
   176  			if j == lo {
   177  				break
   178  			}
   179  			j--
   180  		}
   181  
   182  		// check if pointers cross
   183  		if i >= j {
   184  			break
   185  		}
   186  		m.exch(i, j)
   187  	}
   188  
   189  	// swap with partition element
   190  	m.exch(i, hi)
   191  
   192  	return i
   193  }
   194  
   195  func (m *Manber) exch(i, j int) {
   196  	m.Index[i], m.Index[j] = m.Index[j], m.Index[i]
   197  }
   198  
   199  func (m *Manber) less(v, w int) bool {
   200  	return m.Rank[v+m.offset] < m.Rank[w+m.offset]
   201  }
   202  
   203  func min(a, b int) int {
   204  	if a < b {
   205  		return a
   206  	} else {
   207  		return b
   208  	}
   209  }