github.com/geniusesgroup/libgo@v0.0.0-20220713101832-828057a9d3d4/matn/word.go (about) 1 /* For license and copyright information please see LEGAL file in repository */ 2 3 package matn 4 5 import ( 6 "golang.org/x/crypto/sha3" 7 8 "../ganjine" 9 "../object" 10 "../pehrest" 11 "../protocol" 12 "../syllab" 13 "../time" 14 ) 15 16 const indexWordStructureID uint64 = 17895727148230071652 17 18 var indexWordStructure = ds.DataStructure{ 19 URN: "urn:giti:matn.protocol:data-structure:index-word", 20 ID: 17895727148230071652, 21 IssueDate: 1608786632, 22 ExpiryDate: 0, 23 ExpireInFavorOf: "", 24 Status: protocol.Software_PreAlpha, 25 Structure: IndexWord{}, 26 27 Name: map[protocol.LanguageID]string{ 28 protocol.LanguageEnglish: "Index Word", 29 }, 30 Description: map[protocol.LanguageID]string{ 31 protocol.LanguageEnglish: "store the word index data", 32 }, 33 TAGS: []string{ 34 "", 35 }, 36 } 37 38 // IndexWord is standard structure to store the word index data! 39 type IndexWord struct { 40 Word string `index-hash:"RecordID,RecordID[pair,RecordStructure],RecordID[pair,RecordSecondaryKey],RecordID[pair,RecordOwnerID]"` // Order of recordIDs index changed by algorithm in exact period of time! 41 RecordStructure uint64 42 RecordPrimaryKey [16]byte // Store any primary ID or any data up to 16 byte length e.g. ID 43 RecordSecondaryKey [16]byte // Store any secondary ID or any data up to 16 byte length e.g. GroupID 44 RecordOwnerID [16]byte 45 // It is array because may be more than one location in object have this word 46 Tokens []WordToken 47 } 48 49 // WordToken store detail about a word in the record to index 50 type WordToken struct { 51 RecordID [16]byte `json:",string"` 52 RecordFieldID uint8 53 WordType WordType 54 WordOffsetInSentence uint64 // Position of the word in the sentence 55 WordOffsetInText uint64 // Position of the word in the text 56 OffsetInSentence uint64 // First word charecter possition in the sentence 57 OffsetInText uint64 // First word charecter possition in the text 58 } 59 60 // SaveNew method set some data and write entire IndexWord record with all indexes! 61 func (iw *IndexWord) SaveNew() (err protocol.Error) { 62 err = iw.Set() 63 if err != nil { 64 return 65 } 66 iw.IndexRecordIDForWord() 67 iw.IndexRecordIDForWordRecordStructure() 68 if iw.RecordSecondaryKey != [32]byte{} { 69 iw.IndexRecordIDForWordRecordSecondaryKey() 70 } 71 iw.IndexRecordIDForWordRecordOwnerID() 72 return 73 } 74 75 // SaveOrUpdate method set some data and write entire IndexWord record with all indexes or update exiting one! 76 func (iw *IndexWord) SaveOrUpdate() (err protocol.Error) { 77 var check = IndexWord{ 78 Word: iw.Word, 79 RecordPrimaryKey: iw.RecordPrimaryKey, 80 } 81 err = check.GetByWordRecordPrimaryKey() 82 if err.Equal(object.ErrNotExist) { 83 err = iw.SaveNew() 84 } else if err != nil { 85 // TODO::: handle error 86 } else { 87 iw.Tokens = append(iw.Tokens, make([]WordToken, 0, len(iw.Tokens)+len(check.Tokens))...) 88 for _, token := range check.Tokens { 89 // TODO::: need to check first by RecordID?? 90 iw.Tokens = append(iw.Tokens, token) 91 } 92 err = iw.Set() 93 } 94 return 95 } 96 97 // Set method set some data and write entire IndexWord record! 98 func (iw *IndexWord) Set() (err protocol.Error) { 99 iw.RecordID = iw.hashWordRecordPrimaryKeyForRecordID() 100 iw.RecordStructureID = indexWordStructureID 101 iw.RecordSize = iw.LenAsSyllab() 102 iw.WriteTime = time.Now() 103 iw.OwnerAppID = protocol.OS.AppManifest().AppUUID() 104 105 var req = ganjine.SetRecordReq{ 106 Type: ganjine.RequestTypeBroadcast, 107 Record: iw.ToSyllab(), 108 } 109 err = gsdk.SetRecord(&req) 110 return 111 } 112 113 func (iw *IndexWord) hashWordRecordPrimaryKeyForRecordID() (hash [32]byte) { 114 const field = "WordRecordPrimaryKey" 115 var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32 116 syllab.SetUInt64(buf, 0, indexWordStructureID) 117 copy(buf[8:], iw.RecordPrimaryKey[:]) 118 copy(buf[40:], field) 119 copy(buf[40+len(field):], iw.Word) 120 return sha3.Sum256(buf) 121 } 122 123 // GetByRecordID method read all existing record data by given RecordID! 124 func (iw *IndexWord) GetByRecordID() (err protocol.Error) { 125 var req = ganjine.GetRecordReq{ 126 RecordID: iw.RecordID, 127 RecordStructureID: indexWordStructureID, 128 } 129 var res *ganjine.GetRecordRes 130 res, err = gsdk.GetRecord(&req) 131 if err != nil { 132 return 133 } 134 135 err = iw.FromSyllab(res.Record) 136 if err != nil { 137 return 138 } 139 140 if iw.RecordStructureID != indexWordStructureID { 141 err = ganjine.ErrMisMatchedStructureID 142 } 143 return 144 } 145 146 // GetByWordRecordPrimaryKey find RecordsID by given Word+RecordPrimaryKey 147 func (iw *IndexWord) GetByWordRecordPrimaryKey() (err protocol.Error) { 148 iw.RecordID = iw.hashWordRecordPrimaryKeyForRecordID() 149 err = iw.GetByRecordID() 150 return 151 } 152 153 /* 154 -- Search Methods -- 155 */ 156 157 // FindRecordsIDByWord find RecordsID by given ID 158 func (iw *IndexWord) FindRecordsIDByWord(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) { 159 var indexReq = &pehrest.HashGetValuesReq{ 160 IndexKey: iw.hashWordforRecordID(), 161 Offset: offset, 162 Limit: limit, 163 } 164 var indexRes *pehrest.HashGetValuesRes 165 indexRes, err = psdk.HashGetValues(indexReq) 166 RecordsID = indexRes.IndexValues 167 return 168 } 169 170 // FindRecordsIDByWordRecordStructure find RecordsID by given Word+RecordStructure 171 func (iw *IndexWord) FindRecordsIDByWordRecordStructure(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) { 172 var indexReq = &pehrest.HashGetValuesReq{ 173 IndexKey: iw.hashWordRecordStructureForRecordID(), 174 Offset: offset, 175 Limit: limit, 176 } 177 var indexRes *pehrest.HashGetValuesRes 178 indexRes, err = psdk.HashGetValues(indexReq) 179 RecordsID = indexRes.IndexValues 180 return 181 } 182 183 // FindRecordsIDByWordSecondaryKey find RecordsID by given Word+SecondaryKey 184 func (iw *IndexWord) FindRecordsIDByWordSecondaryKey(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) { 185 var indexReq = &pehrest.HashGetValuesReq{ 186 IndexKey: iw.hashWordRecordSecondaryKeyForRecordID(), 187 Offset: offset, 188 Limit: limit, 189 } 190 var indexRes *pehrest.HashGetValuesRes 191 indexRes, err = psdk.HashGetValues(indexReq) 192 RecordsID = indexRes.IndexValues 193 return 194 } 195 196 // FindRecordsIDByWordRecordOwnerID find RecordsID by given Word+RecordOwnerID 197 func (iw *IndexWord) FindRecordsIDByWordRecordOwnerID(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) { 198 var indexReq = &pehrest.HashGetValuesReq{ 199 IndexKey: iw.hashWordRecordOwnerIDForRecordID(), 200 Offset: offset, 201 Limit: limit, 202 } 203 var indexRes *pehrest.HashGetValuesRes 204 indexRes, err = psdk.HashGetValues(indexReq) 205 RecordsID = indexRes.IndexValues 206 return 207 } 208 209 // FindByWordRecordStructure find by given Word+RecordStructure 210 func (iw *IndexWord) FindByWordRecordStructure(offset, limit uint64) (phraseTokens []PhraseToken, err protocol.Error) { 211 var indexReq = &pehrest.HashGetValuesReq{ 212 IndexKey: iw.hashWordRecordStructureForRecordID(), 213 Offset: offset, 214 Limit: limit, 215 } 216 var indexRes *pehrest.HashGetValuesRes 217 indexRes, err = psdk.HashGetValues(indexReq) 218 var RecordsID = indexRes.IndexValues 219 220 phraseTokens = make([]PhraseToken, len(RecordsID)) 221 for i := 0; i < len(RecordsID); i++ { 222 iw.RecordID = RecordsID[i] 223 iw.GetByRecordID() 224 225 phraseTokens[i] = PhraseToken{ 226 RecordID: iw.Tokens[len(iw.Tokens)-1].RecordID, 227 RecordStructureID: iw.RecordStructure, 228 RecordFieldID: iw.Tokens[len(iw.Tokens)-1].RecordFieldID, 229 RecordPrimaryKey: iw.RecordPrimaryKey, 230 } 231 } 232 return 233 } 234 235 /* 236 -- PRIMARY INDEXES -- 237 */ 238 239 // IndexRecordIDForWord save RecordID chain for ID+Language 240 // Call in each update to the exiting record! 241 func (iw *IndexWord) IndexRecordIDForWord() { 242 var indexRequest = pehrest.HashSetValueReq{ 243 Type: ganjine.RequestTypeBroadcast, 244 IndexKey: iw.hashWordforRecordID(), 245 IndexValue: iw.RecordID, 246 } 247 var err = psdk.HashSetValue(&indexRequest) 248 if err != nil { 249 // TODO::: we must retry more due to record wrote successfully! 250 } 251 } 252 253 func (iw *IndexWord) hashWordforRecordID() (hash [32]byte) { 254 const field = "Word" 255 var buf = make([]byte, 8+len(field)+len(iw.Word)) 256 syllab.SetUInt64(buf, 0, indexWordStructureID) 257 copy(buf[8:], field) 258 copy(buf[8+len(field):], iw.Word) 259 return sha3.Sum256(buf[:]) 260 } 261 262 /* 263 -- SECONDARY INDEXES -- 264 */ 265 266 // IndexRecordIDForWordRecordStructure save RecordID chain for Word+RecordStructure 267 // Don't call in update to an exiting record! 268 func (iw *IndexWord) IndexRecordIDForWordRecordStructure() { 269 var indexRequest = pehrest.HashSetValueReq{ 270 Type: ganjine.RequestTypeBroadcast, 271 IndexKey: iw.hashWordRecordStructureForRecordID(), 272 IndexValue: iw.RecordID, 273 } 274 var err = psdk.HashSetValue(&indexRequest) 275 if err != nil { 276 // TODO::: we must retry more due to record wrote successfully! 277 } 278 } 279 280 func (iw *IndexWord) hashWordRecordStructureForRecordID() (hash [32]byte) { 281 const field = "WordRecordStructure" 282 var buf = make([]byte, 16+len(field)+len(iw.Word)) // 8+8 283 syllab.SetUInt64(buf, 0, indexWordStructureID) 284 syllab.SetUInt64(buf, 8, iw.RecordStructure) 285 copy(buf[16:], field) 286 copy(buf[16+len(field):], iw.Word) 287 return sha3.Sum256(buf) 288 } 289 290 // IndexRecordIDForWordRecordSecondaryKey save RecordID chain for Word+RecordSecondaryKey 291 // Don't call in update to an exiting record! 292 func (iw *IndexWord) IndexRecordIDForWordRecordSecondaryKey() { 293 var indexRequest = pehrest.HashSetValueReq{ 294 Type: ganjine.RequestTypeBroadcast, 295 IndexKey: iw.hashWordRecordSecondaryKeyForRecordID(), 296 IndexValue: iw.RecordID, 297 } 298 var err = psdk.HashSetValue(&indexRequest) 299 if err != nil { 300 // TODO::: we must retry more due to record wrote successfully! 301 } 302 } 303 304 func (iw *IndexWord) hashWordRecordSecondaryKeyForRecordID() (hash [32]byte) { 305 const field = "WordRecordSecondaryKey" 306 var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32 307 syllab.SetUInt64(buf, 0, indexWordStructureID) 308 copy(buf[8:], iw.RecordSecondaryKey[:]) 309 copy(buf[40:], field) 310 copy(buf[40+len(field):], iw.Word) 311 return sha3.Sum256(buf) 312 } 313 314 // IndexRecordIDForWordRecordOwnerID save RecordID chain for Word+RecordOwnerID 315 // Don't call in update to an exiting record! 316 func (iw *IndexWord) IndexRecordIDForWordRecordOwnerID() { 317 var indexRequest = pehrest.HashSetValueReq{ 318 Type: ganjine.RequestTypeBroadcast, 319 IndexKey: iw.hashWordRecordOwnerIDForRecordID(), 320 IndexValue: iw.RecordID, 321 } 322 var err = psdk.HashSetValue(&indexRequest) 323 if err != nil { 324 // TODO::: we must retry more due to record wrote successfully! 325 } 326 } 327 328 func (iw *IndexWord) hashWordRecordOwnerIDForRecordID() (hash [32]byte) { 329 const field = "WordRecordOwnerID" 330 var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32 331 syllab.SetUInt64(buf, 0, indexWordStructureID) 332 copy(buf[8:], iw.RecordOwnerID[:]) 333 copy(buf[40:], field) 334 copy(buf[40+len(field):], iw.Word) 335 return sha3.Sum256(buf) 336 } 337 338 /* 339 -- Syllab Encoder & Decoder -- 340 */ 341 342 func (iw *IndexWord) FromSyllab(payload []byte, stackIndex uint32) { 343 if uint32(len(buf)) < iw.LenOfSyllabStack() { 344 err = syllab.ErrShortArrayDecode 345 return 346 } 347 var i, add, ln uint32 // index, address and len of strings, slices, maps, ... 348 349 iw.Word = syllab.UnsafeGetString(buf, 88) 350 iw.RecordStructure = syllab.GetUInt64(buf, 96) 351 copy(iw.RecordPrimaryKey[:], buf[104:]) 352 copy(iw.RecordSecondaryKey[:], buf[136:]) 353 copy(iw.RecordOwnerID[:], buf[168:]) 354 355 add = syllab.GetUInt32(buf, 200) 356 ln = syllab.GetUInt32(buf, 204) 357 iw.Tokens = make([]WordToken, ln) 358 for i = 0; i < ln; i++ { 359 iw.Tokens[i].FromSyllab(buf, add) 360 add += uint32(iw.Tokens[i].LenAsSyllab()) 361 } 362 return 363 } 364 365 func (iw *IndexWord) ToSyllab(payload []byte, stackIndex, heapIndex uint32) (freeHeapIndex uint32) { 366 buf = make([]byte, iw.LenAsSyllab()) 367 var hi uint32 = iw.LenOfSyllabStack() // Heap index || Stack size! 368 369 hi = syllab.SetString(buf, iw.Word, 88, hi) 370 syllab.SetUInt64(buf, 96, iw.RecordStructure) 371 copy(buf[104:], iw.RecordPrimaryKey[:]) 372 copy(buf[136:], iw.RecordSecondaryKey[:]) 373 copy(buf[168:], iw.RecordOwnerID[:]) 374 375 syllab.SetUInt32(buf, 200, hi) 376 syllab.SetUInt32(buf, 204, uint32(len(iw.Tokens))) 377 for i := 0; i < len(iw.Tokens); i++ { 378 iw.Tokens[i].ToSyllab(buf, hi) 379 hi += uint32(iw.Tokens[i].LenAsSyllab()) 380 } 381 return 382 } 383 384 func (iw *IndexWord) LenOfSyllabStack() uint32 { 385 ln = 208 386 return 387 } 388 389 func (iw *IndexWord) LenOfSyllabHeap() (ln uint32) { 390 ln += uint32(len(iw.Word)) 391 ln += (uint32(len(iw.Tokens)) * iw.Tokens[0].LenOfSyllabStack()) 392 // ln += uint32(len(iw.Tokens)) * iw.Tokens[0].LenOfSyllabHeap() 393 return 394 } 395 396 func (iw *IndexWord) LenAsSyllab() uint64 { 397 return uint64(iw.LenOfSyllabStack() + iw.LenOfSyllabHeap()) 398 } 399 400 /* 401 -- Syllab Encoder & Decoder -- 402 */ 403 404 func (wt *WordToken) FromSyllab(buf []byte, stackIndex uint32) { 405 copy(wt.RecordID[:], buf[stackIndex:]) 406 wt.RecordFieldID = syllab.GetUInt8(buf, stackIndex+32) 407 wt.WordType = WordType(syllab.GetUInt16(buf, stackIndex+33)) 408 wt.WordOffsetInSentence = syllab.GetUInt64(buf, stackIndex+35) 409 wt.WordOffsetInText = syllab.GetUInt64(buf, stackIndex+43) 410 wt.OffsetInSentence = syllab.GetUInt64(buf, stackIndex+51) 411 wt.OffsetInText = syllab.GetUInt64(buf, stackIndex+59) 412 } 413 414 func (wt *WordToken) ToSyllab(buf []byte, stackIndex uint32) { 415 copy(buf[stackIndex:], wt.RecordID[:]) 416 syllab.SetUInt8(buf, stackIndex+32, wt.RecordFieldID) 417 syllab.SetUInt16(buf, stackIndex+33, uint16(wt.WordType)) 418 syllab.SetUInt64(buf, stackIndex+35, wt.WordOffsetInSentence) 419 syllab.SetUInt64(buf, stackIndex+43, wt.WordOffsetInText) 420 syllab.SetUInt64(buf, stackIndex+51, wt.OffsetInSentence) 421 syllab.SetUInt64(buf, stackIndex+59, wt.OffsetInText) 422 } 423 424 func (wt *WordToken) LenOfSyllabStack() uint32 { 425 return 67 426 } 427 428 func (wt *WordToken) LenOfSyllabHeap() (ln uint32) { 429 return 430 } 431 432 func (wt *WordToken) LenAsSyllab() uint64 { 433 return uint64(wt.LenOfSyllabStack() + wt.LenOfSyllabHeap()) 434 }