github.com/ledgerwatch/erigon-lib@v1.0.0/sais/gsa/gsaca.go (about)

     1  package gsa
     2  
     3  /*
     4  #include "gsacak.h"
     5  #cgo CFLAGS: -DTERMINATOR=0 -DM64=1 -Dm64=1 -std=c99
     6  */
     7  import "C"
     8  import (
     9  	"fmt"
    10  	"unsafe"
    11  )
    12  
    13  // Implementation from https://github.com/felipelouza/gsufsort
    14  // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf
    15  // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf
    16  func PrintArrays(str []byte, sa []uint, lcp []int, da []int32) {
    17  	// remove terminator
    18  	n := len(sa) - 1
    19  	sa = sa[1:]
    20  	lcp = lcp[1:]
    21  	da = da[1:]
    22  
    23  	fmt.Printf("i\t")
    24  	fmt.Printf("sa\t")
    25  	if lcp != nil {
    26  		fmt.Printf("lcp\t")
    27  	}
    28  	if da != nil {
    29  		fmt.Printf("gsa\t")
    30  	}
    31  	fmt.Printf("suffixes\t")
    32  	fmt.Printf("\n")
    33  	for i := 0; i < n; i++ {
    34  		fmt.Printf("%d\t", i)
    35  		fmt.Printf("%d\t", sa[i])
    36  		if lcp != nil {
    37  			fmt.Printf("%d\t", lcp[i])
    38  		}
    39  
    40  		if da != nil { // gsa
    41  			value := sa[i]
    42  			if da[i] != 0 {
    43  				value = sa[i] - sa[da[i]-1] - 1
    44  			}
    45  			fmt.Printf("(%d %d)\t", da[i], value)
    46  		}
    47  		//bwt
    48  		//	char c = (SA[i])? T[SA[i]-1]-1:terminal;
    49  		//	if(c==0) c = '$';
    50  		//	printf("%c\t",c);
    51  
    52  		for j := sa[i]; int(j) < n; j++ {
    53  			if str[j] == 1 {
    54  				fmt.Printf("$")
    55  				break
    56  			} else if str[j] == 0 {
    57  				fmt.Printf("#")
    58  			} else {
    59  				fmt.Printf("%c", str[j]-1)
    60  			}
    61  		}
    62  		fmt.Printf("\n")
    63  	}
    64  }
    65  
    66  // nolint
    67  // SA2GSA - example func to convert SA+DA to GSA
    68  func SA2GSA(sa []uint, da []int32) []uint {
    69  	// remove terminator
    70  	sa = sa[1:]
    71  	da = da[1:]
    72  	n := len(sa) - 1
    73  
    74  	gsa := make([]uint, n)
    75  	copy(gsa, sa)
    76  
    77  	for i := 0; i < n; i++ {
    78  		if da[i] != 0 {
    79  			gsa[i] = sa[i] - sa[da[i]-1] - 1
    80  		}
    81  	}
    82  	return gsa
    83  }
    84  
    85  func PrintRepeats(str []byte, sa []uint, da []int32) {
    86  	sa = sa[1:]
    87  	da = da[1:]
    88  	n := len(sa) - 1
    89  	var repeats int
    90  	for i := 0; i < len(da)-1; i++ {
    91  		repeats++
    92  		if da[i] < da[i+1] { // same suffix
    93  			continue
    94  		}
    95  
    96  		// new suffix
    97  		fmt.Printf(" repeats: %d\t", repeats)
    98  		for j := sa[i]; int(j) < n; j++ {
    99  			if str[j] == 1 {
   100  				//fmt.Printf("$")
   101  				break
   102  			} else if str[j] == 0 {
   103  				fmt.Printf("#")
   104  			} else {
   105  				fmt.Printf("%c", str[j]-1)
   106  			}
   107  		}
   108  		fmt.Printf("\n")
   109  
   110  		repeats = 0
   111  	}
   112  }
   113  
   114  func GSA(data []byte, sa []uint, lcp []int, da []int32) error {
   115  	tPtr := unsafe.Pointer(&data[0]) // source "text"
   116  	var lcpPtr, saPtr, daPtr unsafe.Pointer
   117  	if sa != nil {
   118  		saPtr = unsafe.Pointer(&sa[0])
   119  	}
   120  	if lcp != nil {
   121  		lcpPtr = unsafe.Pointer(&lcp[0])
   122  	}
   123  	if da != nil {
   124  		daPtr = unsafe.Pointer(&da[0])
   125  	}
   126  	depth := C.gsacak(
   127  		(*C.uchar)(tPtr),
   128  		(*C.uint_t)(saPtr),
   129  		(*C.int_t)(lcpPtr),
   130  		(*C.int_da)(daPtr),
   131  		C.uint_t(len(data)),
   132  	)
   133  	_ = depth
   134  	return nil
   135  }
   136  
   137  func ConcatAll(R [][]byte) (str []byte, n int) {
   138  	for i := 0; i < len(R); i++ {
   139  		n += len(R[i]) + 1
   140  	}
   141  
   142  	n++ //add 0 at the end
   143  	str = make([]byte, n)
   144  	var l, max int
   145  	k := len(R)
   146  
   147  	for i := 0; i < k; i++ {
   148  		m := len(R[i])
   149  		if m > max {
   150  			max = m
   151  		}
   152  		for j := 0; j < m; j++ {
   153  			if R[i][j] < 255 && R[i][j] > 1 {
   154  				str[l] = R[i][j] + 1
   155  				l++
   156  			}
   157  		}
   158  		if m > 0 {
   159  			if str[l-1] > 1 {
   160  				str[l] = 1
   161  				l++
   162  			} //add 1 as separator (ignores empty entries)
   163  		}
   164  	}
   165  	str[l] = 0
   166  	l++
   167  	n = l
   168  	return str, n
   169  }