github.com/ledgerwatch/erigon-lib@v1.0.0/sais/gsa/gsaca.go (about) 1 package gsa 2 3 /* 4 #include "gsacak.h" 5 #cgo CFLAGS: -DTERMINATOR=0 -DM64=1 -Dm64=1 -std=c99 6 */ 7 import "C" 8 import ( 9 "fmt" 10 "unsafe" 11 ) 12 13 // Implementation from https://github.com/felipelouza/gsufsort 14 // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf 15 // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf 16 func PrintArrays(str []byte, sa []uint, lcp []int, da []int32) { 17 // remove terminator 18 n := len(sa) - 1 19 sa = sa[1:] 20 lcp = lcp[1:] 21 da = da[1:] 22 23 fmt.Printf("i\t") 24 fmt.Printf("sa\t") 25 if lcp != nil { 26 fmt.Printf("lcp\t") 27 } 28 if da != nil { 29 fmt.Printf("gsa\t") 30 } 31 fmt.Printf("suffixes\t") 32 fmt.Printf("\n") 33 for i := 0; i < n; i++ { 34 fmt.Printf("%d\t", i) 35 fmt.Printf("%d\t", sa[i]) 36 if lcp != nil { 37 fmt.Printf("%d\t", lcp[i]) 38 } 39 40 if da != nil { // gsa 41 value := sa[i] 42 if da[i] != 0 { 43 value = sa[i] - sa[da[i]-1] - 1 44 } 45 fmt.Printf("(%d %d)\t", da[i], value) 46 } 47 //bwt 48 // char c = (SA[i])? T[SA[i]-1]-1:terminal; 49 // if(c==0) c = '$'; 50 // printf("%c\t",c); 51 52 for j := sa[i]; int(j) < n; j++ { 53 if str[j] == 1 { 54 fmt.Printf("$") 55 break 56 } else if str[j] == 0 { 57 fmt.Printf("#") 58 } else { 59 fmt.Printf("%c", str[j]-1) 60 } 61 } 62 fmt.Printf("\n") 63 } 64 } 65 66 // nolint 67 // SA2GSA - example func to convert SA+DA to GSA 68 func SA2GSA(sa []uint, da []int32) []uint { 69 // remove terminator 70 sa = sa[1:] 71 da = da[1:] 72 n := len(sa) - 1 73 74 gsa := make([]uint, n) 75 copy(gsa, sa) 76 77 for i := 0; i < n; i++ { 78 if da[i] != 0 { 79 gsa[i] = sa[i] - sa[da[i]-1] - 1 80 } 81 } 82 return gsa 83 } 84 85 func PrintRepeats(str []byte, sa []uint, da []int32) { 86 sa = sa[1:] 87 da = da[1:] 88 n := len(sa) - 1 89 var repeats int 90 for i := 0; i < len(da)-1; i++ { 91 repeats++ 92 if da[i] < da[i+1] { // same suffix 93 continue 94 } 95 96 // new suffix 97 fmt.Printf(" repeats: %d\t", repeats) 98 for j := sa[i]; int(j) < n; j++ { 99 if str[j] == 1 { 100 //fmt.Printf("$") 101 break 102 } else if str[j] == 0 { 103 fmt.Printf("#") 104 } else { 105 fmt.Printf("%c", str[j]-1) 106 } 107 } 108 fmt.Printf("\n") 109 110 repeats = 0 111 } 112 } 113 114 func GSA(data []byte, sa []uint, lcp []int, da []int32) error { 115 tPtr := unsafe.Pointer(&data[0]) // source "text" 116 var lcpPtr, saPtr, daPtr unsafe.Pointer 117 if sa != nil { 118 saPtr = unsafe.Pointer(&sa[0]) 119 } 120 if lcp != nil { 121 lcpPtr = unsafe.Pointer(&lcp[0]) 122 } 123 if da != nil { 124 daPtr = unsafe.Pointer(&da[0]) 125 } 126 depth := C.gsacak( 127 (*C.uchar)(tPtr), 128 (*C.uint_t)(saPtr), 129 (*C.int_t)(lcpPtr), 130 (*C.int_da)(daPtr), 131 C.uint_t(len(data)), 132 ) 133 _ = depth 134 return nil 135 } 136 137 func ConcatAll(R [][]byte) (str []byte, n int) { 138 for i := 0; i < len(R); i++ { 139 n += len(R[i]) + 1 140 } 141 142 n++ //add 0 at the end 143 str = make([]byte, n) 144 var l, max int 145 k := len(R) 146 147 for i := 0; i < k; i++ { 148 m := len(R[i]) 149 if m > max { 150 max = m 151 } 152 for j := 0; j < m; j++ { 153 if R[i][j] < 255 && R[i][j] > 1 { 154 str[l] = R[i][j] + 1 155 l++ 156 } 157 } 158 if m > 0 { 159 if str[l-1] > 1 { 160 str[l] = 1 161 l++ 162 } //add 1 as separator (ignores empty entries) 163 } 164 } 165 str[l] = 0 166 l++ 167 n = l 168 return str, n 169 }