github.com/ledgerwatch/erigon-lib@v1.0.0/sais/gsa/gsacak.h (about) 1 // vim: noai:ts=2:sw=2 2 3 /* 4 * Authors: Felipe A. Louza, Simon Gog, Guilherme P. Telles 5 * contact: louza@ic.unicamp.br 6 * 03/04/2017 7 */ 8 9 /* 10 * This code is a modification of SACA-K algorithm by G. Nong, which can be 11 * retrieved at: http://code.google.com/p/ge-nong/ 12 * 13 * Our version of SACA-K, called gSACA-K, maintain the theoretical bounds of the 14 * original algorithm to construct the generalized suffix array. 15 * 16 * Our algorithm gSACA-K can also computes the LCP-array and the Document-array 17 * with no additional costs. 18 * 19 * gsacak(s, SA, NULL, NULL, n) //computes only SA 20 * gsacak(s, SA, LCP, NULL, n) //computes SA and LCP 21 * gsacak(s, SA, NULL, DA, n) //computes SA and DA 22 * gsacak(s, SA, LCP, DA, n) //computes SA, LCP and DA 23 * 24 */ 25 26 /******************************************************************************/ 27 28 #ifndef GSACAK_H 29 #define GSACAK_H 30 31 #include <stdlib.h> 32 #include <stdio.h> 33 #include <limits.h> 34 #include <inttypes.h> 35 #include <string.h> 36 #include <time.h> 37 38 #define max(a,b) ((a) > (b) ? (a) : (b)) 39 40 #ifndef DEBUG 41 #define DEBUG 0 42 #endif 43 44 #ifndef M64 45 #define M64 1 46 #endif 47 48 #if M64 49 typedef int64_t int_t; 50 typedef uint64_t uint_t; 51 #define PRIdN PRId64 52 #define U_MAX UINT64_MAX 53 #define I_MAX INT64_MAX 54 #define I_MIN INT64_MIN 55 #else 56 typedef int32_t int_t; 57 typedef uint32_t uint_t; 58 #define PRIdN PRId32 59 #define U_MAX UINT32_MAX 60 #define I_MAX INT32_MAX 61 #define I_MIN INT32_MIN 62 #endif 63 64 /*! @option type of s[0,n-1] for integer alphabets 65 * 66 * @constraint sizeof(int_t) >= sizeof(int_text) 67 */ 68 typedef uint32_t int_text; //4N bytes for s[0..n-1] 69 #define PRIdT PRIu32 70 71 /*! @option type for array DA 72 */ 73 typedef int32_t int_da; 74 #define PRIdA PRId32 75 76 /******************************************************************************/ 77 78 /** @brief computes the suffix array of string s[0..n-1] 79 * 80 * @param s input string with s[n-1]=0 81 * @param SA suffix array 82 * @param n string length 83 * @return -1 if an error occured, otherwise the depth of the recursive calls. 84 */ 85 int sacak(unsigned char *s, uint_t *SA, uint_t n); 86 87 /** @brief computes the suffix array of string s[0..n-1] 88 * 89 * @param k alphabet size+1 (0 is reserved) 90 */ 91 int sacak_int(int_text *s, uint_t *SA, uint_t n, uint_t k); 92 93 /******************************************************************************/ 94 95 /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 96 * 97 * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 98 * @param SA Suffix array 99 * @param LCP LCP array 100 * @param DA Document array 101 * @param n String length 102 * 103 * @return depth of the recursive calls. 104 */ 105 int gsacak(unsigned char *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n); 106 107 /** @brief Computes the suffix array SA (LCP, DA) of T^cat in s[0..n-1] 108 * 109 * @param s input concatenated string, using separators s[i]=1 and with s[n-1]=0 110 * @param SA Suffix array 111 * @param LCP LCP array 112 * @param DA Document array 113 * @param n String length 114 * @param k alphabet size+2 (0 and 1 are reserved) 115 * 116 * @return depth of the recursive calls. 117 */ 118 int gsacak_int(int_text *s, uint_t *SA, int_t *LCP, int_da *DA, uint_t n, uint_t k); 119 120 /******************************************************************************/ 121 122 123 124 int_t SACA_K(int_t *s, uint_t *SA, 125 uint_t n, unsigned int K, 126 uint_t m, int cs, int level); 127 128 int_t gSACA_K(uint_t *s, uint_t *SA, 129 uint_t n, unsigned int K, 130 int cs, uint_t separator, int level); 131 132 #endif