github.com/geniusesgroup/libgo@v0.0.0-20220713101832-828057a9d3d4/matn/phrase.go (about) 1 /* For license and copyright information please see LEGAL file in repository */ 2 3 package matn 4 5 import ( 6 "golang.org/x/crypto/sha3" 7 8 "../ganjine" 9 "../json" 10 "../pehrest" 11 "../protocol" 12 "../syllab" 13 "../time/utc" 14 ) 15 16 const indexPhraseStructureID uint64 = 736712670881955651 17 18 var indexPhraseStructure = ds.DataStructure{ 19 URN: "urn:giti:matn.protocol:data-structure:index-phrase", 20 ID: indexPhraseStructureID, 21 IssueDate: 1608786641, 22 ExpiryDate: 0, 23 ExpireInFavorOf: "", 24 Status: protocol.Software_PreAlpha, 25 Structure: IndexPhrase{}, 26 27 Name: map[protocol.LanguageID]string{ 28 protocol.LanguageEnglish: "Index Phrase", 29 }, 30 Description: map[protocol.LanguageID]string{ 31 protocol.LanguageEnglish: "Store the phrase index", 32 }, 33 TAGS: []string{ 34 "", 35 }, 36 } 37 38 // IndexPhrase is standard structure to store any hash byte index!! 39 // It is simple secondary index e.g. hash("user@email.com") 40 type IndexPhrase struct { 41 RecordID [32]byte 42 Terms []string `index-hash:"RecordID[pair,PageNumber]"` // Array must be order to retrievable! 43 PageNumber uint64 44 Tokens [10]PhraseToken // Order of PhraseTokens index changed by algorithm in exact period of time! 45 } 46 47 // PhraseToken store detail about a word in the record to index 48 type PhraseToken struct { 49 RecordID [32]byte `json:",string"` 50 RecordStructureID uint64 51 RecordFieldID uint8 52 RecordPrimaryKey [32]byte `json:",string"` // Store any primary ID or any data up to 32 byte length 53 // Don't need Snippet text due to it is not web search engine! 54 } 55 56 // SaveNew method set some data and write entire IndexPhrase record with all indexes! 57 func (ip *IndexPhrase) SaveNew() (err protocol.Error) { 58 err = ip.Set() 59 if err != nil { 60 return 61 } 62 ip.IndexRecordIDForTermsPageNumber() 63 return 64 } 65 66 // Set method set some data and write entire IndexPhrase record! 67 func (ip *IndexPhrase) Set() (err protocol.Error) { 68 ip.RecordStructureID = indexPhraseStructureID 69 ip.RecordSize = ip.LenAsSyllab() 70 ip.WriteTime = utc.Now() 71 ip.OwnerAppID = protocol.OS.AppManifest().AppUUID() 72 73 var req = ganjine.SetRecordReq{ 74 Type: ganjine.RequestTypeBroadcast, 75 Record: ip.ToSyllab(), 76 } 77 ip.RecordID = sha3.Sum256(req.Record[32:]) 78 copy(req.Record[0:], ip.RecordID[:]) 79 80 err = gsdk.SetRecord(&req) 81 return 82 } 83 84 // GetByRecordID method read all existing record data by given RecordID! 85 func (ip *IndexPhrase) GetByRecordID() (err protocol.Error) { 86 var req = ganjine.GetRecordReq{ 87 RecordID: ip.RecordID, 88 RecordStructureID: indexPhraseStructureID, 89 } 90 var res *ganjine.GetRecordRes 91 res, err = gsdk.GetRecord(&req) 92 if err != nil { 93 return 94 } 95 96 err = ip.FromSyllab(res.Record) 97 if err != nil { 98 return 99 } 100 101 if ip.RecordStructureID != indexPhraseStructureID { 102 err = ganjine.ErrMisMatchedStructureID 103 } 104 return 105 } 106 107 /* 108 -- Search Methods -- 109 */ 110 111 // FindRecordsIDByTermsPageNumber find RecordsID by given Terms+PageNumber 112 func (ip *IndexPhrase) FindRecordsIDByTermsPageNumber(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) { 113 var indexReq = &pehrest.HashGetValuesReq{ 114 IndexKey: ip.hashTermsPageNumberForRecordID(), 115 Offset: offset, 116 Limit: limit, 117 } 118 var indexRes *pehrest.HashGetValuesRes 119 indexRes, err = psdk.HashGetValues(indexReq) 120 RecordsID = indexRes.IndexValues 121 return 122 } 123 124 /* 125 -- PRIMARY INDEXES -- 126 */ 127 128 // IndexRecordIDForTermsPageNumber save RecordID chain for Terms+PageNumber 129 // Call in each update to the exiting record! 130 func (ip *IndexPhrase) IndexRecordIDForTermsPageNumber() { 131 var indexRequest = pehrest.HashSetValueReq{ 132 Type: ganjine.RequestTypeBroadcast, 133 IndexKey: ip.hashTermsPageNumberForRecordID(), 134 IndexValue: ip.RecordID, 135 } 136 var err = psdk.HashSetValue(&indexRequest) 137 if err != nil { 138 // TODO::: we must retry more due to record wrote successfully! 139 } 140 } 141 142 func (ip *IndexPhrase) hashTermsPageNumberForRecordID() (hash [32]byte) { 143 const field = "TermsPageNumber" 144 var bufLen = 16 + len(field) 145 for _, t := range ip.Terms { 146 bufLen += len(t) 147 } 148 var buf = make([]byte, bufLen) 149 syllab.SetUInt64(buf, 0, indexPhraseStructureID) 150 syllab.SetUInt64(buf, 8, ip.PageNumber) 151 copy(buf[16:], field) 152 bufLen = 16 + len(field) 153 for _, t := range ip.Terms { 154 copy(buf[bufLen:], t) 155 bufLen += len(t) 156 } 157 return sha3.Sum256(buf[:]) 158 } 159 160 /* 161 -- Syllab Encoder & Decoder -- 162 */ 163 164 func (ip *IndexPhrase) FromSyllab(payload []byte, stackIndex uint32) { 165 if uint32(len(payload)) < ip.LenOfSyllabStack() { 166 err = syllab.ErrShortArrayDecode 167 return 168 } 169 170 ip.Terms = syllab.UnsafeGetStringArray(payload, 88) 171 ip.PageNumber = syllab.GetUInt64(payload, 96) 172 173 var si uint32 = 104 174 for i := 0; i < 10; i++ { 175 ip.Tokens[i].FromSyllab(payload, si) 176 si += uint32(ip.Tokens[i].LenAsSyllab()) 177 } 178 return 179 } 180 181 func (ip *IndexPhrase) ToSyllab(payload []byte, stackIndex, heapIndex uint32) (freeHeapIndex uint32) { 182 heapIndex = syllab.SetStringArray(payload, ip.Terms, 88, heapIndex) 183 syllab.SetUInt64(payload, 96, ip.PageNumber) 184 185 var si uint32 = 104 186 for i := 0; i < 10; i++ { 187 ip.Tokens[i].ToSyllab(payload, si) 188 si += uint32(ip.Tokens[i].LenAsSyllab()) 189 } 190 return heapIndex 191 } 192 193 func (ip *IndexPhrase) LenOfSyllabStack() (ln uint32) { 194 ln = 104 195 ln += uint32(len(ip.Tokens)) * ip.Tokens[0].LenOfSyllabStack() 196 return 197 } 198 199 func (ip *IndexPhrase) LenOfSyllabHeap() (ln uint32) { 200 for i := 0; i < len(ip.Terms); i++ { 201 ln += uint32(len(ip.Terms[i])) 202 } 203 // ln += uint32(len(ip.Tokens)) * ip.Tokens[0].LenOfSyllabHeap 204 return 205 } 206 207 func (ip *IndexPhrase) LenAsSyllab() uint64 { 208 return uint64(ip.LenOfSyllabStack() + ip.LenOfSyllabHeap()) 209 } 210 211 /* 212 -- PhraseToken Encoder & Decoder -- 213 */ 214 215 func (pt *PhraseToken) FromSyllab(buf []byte, stackIndex uint32) { 216 copy(pt.RecordID[:], buf[stackIndex:]) 217 pt.RecordStructureID = syllab.GetUInt64(buf, stackIndex+32) 218 pt.RecordFieldID = syllab.GetUInt8(buf, stackIndex+40) 219 copy(pt.RecordPrimaryKey[:], buf[stackIndex+41:]) 220 } 221 222 func (pt *PhraseToken) ToSyllab(buf []byte, stackIndex uint32) { 223 copy(buf[0:], pt.RecordID[stackIndex:]) 224 syllab.SetUInt64(buf, stackIndex+32, pt.RecordStructureID) 225 syllab.SetUInt8(buf, stackIndex+40, pt.RecordFieldID) 226 copy(buf[stackIndex+41:], pt.RecordPrimaryKey[:]) 227 } 228 229 func (pt *PhraseToken) LenOfSyllabStack() uint32 { 230 return 73 231 } 232 233 func (pt *PhraseToken) LenOfSyllabHeap() (ln uint32) { 234 return 235 } 236 237 func (pt *PhraseToken) LenAsSyllab() uint64 { 238 return uint64(pt.LenOfSyllabStack() + pt.LenOfSyllabHea) 239 } 240 241 func (pt *PhraseToken) FromJSON(decoder *json.DecoderUnsafeMinified) (err protocol.Error) { 242 for err == nil { 243 var keyName = decoder.DecodeKey() 244 switch keyName { 245 case "RecordID": 246 err = decoder.DecodeByteArrayAsBase64(pt.RecordID[:]) 247 case "RecordStructureID": 248 pt.RecordStructureID, err = decoder.DecodeUInt64() 249 case "RecordFieldID": 250 pt.RecordFieldID, err = decoder.DecodeUInt8() 251 case "RecordPrimaryKey": 252 err = decoder.DecodeByteArrayAsBase64(pt.RecordPrimaryKey[:]) 253 default: 254 err = decoder.NotFoundKeyStrict() 255 } 256 257 if decoder.End() { 258 return 259 } 260 } 261 return 262 } 263 264 func (pt *PhraseToken) ToJSON(payload []byte) []byte { 265 var encoder = json.Encoder{Buf: payload} 266 267 encoder.EncodeString(`{"RecordID":"`) 268 encoder.EncodeByteSliceAsBase64(pt.RecordID[:]) 269 270 encoder.EncodeString(`","RecordStructureID":`) 271 encoder.EncodeUInt64(pt.RecordStructureID) 272 273 encoder.EncodeString(`,"RecordFieldID":`) 274 encoder.EncodeUInt8(pt.RecordFieldID) 275 276 encoder.EncodeString(`,"RecordPrimaryKey":"`) 277 encoder.EncodeByteSliceAsBase64(pt.RecordPrimaryKey[:]) 278 279 encoder.EncodeString(`"}`) 280 return encoder.Buf 281 } 282 283 func (pt *PhraseToken) LenAsJSON() (ln int) { 284 ln = 184 285 return 286 }