github.com/GeniusesGroup/libgo@v0.0.0-20220929090155-5ff932cb408e/matn/word.go (about)

     1  /* For license and copyright information please see LEGAL file in repository */
     2  
     3  package matn
     4  
     5  import (
     6  	"golang.org/x/crypto/sha3"
     7  
     8  	"../ganjine"
     9  	"../object"
    10  	"../pehrest"
    11  	"../protocol"
    12  	"../syllab"
    13  	"../time"
    14  )
    15  
    16  const indexWordStructureID uint64 = 17895727148230071652
    17  
    18  var indexWordStructure = ds.DataStructure{
    19  	URN:             "urn:giti:matn.protocol:data-structure:index-word",
    20  	ID:              17895727148230071652,
    21  	IssueDate:       1608786632,
    22  	ExpiryDate:      0,
    23  	ExpireInFavorOf: "",
    24  	Status:          protocol.Software_PreAlpha,
    25  	Structure:       IndexWord{},
    26  
    27  	Name: map[protocol.LanguageID]string{
    28  		protocol.LanguageEnglish: "Index Word",
    29  	},
    30  	Description: map[protocol.LanguageID]string{
    31  		protocol.LanguageEnglish: "store the word index data",
    32  	},
    33  	TAGS: []string{
    34  		"",
    35  	},
    36  }
    37  
    38  // IndexWord is standard structure to store the word index data!
    39  type IndexWord struct {
    40  	Word               string `index-hash:"RecordID,RecordID[pair,RecordStructure],RecordID[pair,RecordSecondaryKey],RecordID[pair,RecordOwnerID]"` // Order of recordIDs index changed by algorithm in exact period of time!
    41  	RecordStructure    uint64
    42  	RecordPrimaryKey   [16]byte // Store any primary ID or any data up to 16 byte length e.g. ID
    43  	RecordSecondaryKey [16]byte // Store any secondary ID or any data up to 16 byte length e.g. GroupID
    44  	RecordOwnerID      [16]byte
    45  	// It is array because may be more than one location in object have this word
    46  	Tokens []WordToken
    47  }
    48  
    49  // WordToken store detail about a word in the record to index
    50  type WordToken struct {
    51  	RecordID             [16]byte `json:",string"`
    52  	RecordFieldID        uint8
    53  	WordType             WordType
    54  	WordOffsetInSentence uint64 //  Position of the word in the sentence
    55  	WordOffsetInText     uint64 //  Position of the word in the text
    56  	OffsetInSentence     uint64 //  First word charecter possition in the sentence
    57  	OffsetInText         uint64 //  First word charecter possition in the text
    58  }
    59  
    60  // SaveNew method set some data and write entire IndexWord record with all indexes!
    61  func (iw *IndexWord) SaveNew() (err protocol.Error) {
    62  	err = iw.Set()
    63  	if err != nil {
    64  		return
    65  	}
    66  	iw.IndexRecordIDForWord()
    67  	iw.IndexRecordIDForWordRecordStructure()
    68  	if iw.RecordSecondaryKey != [32]byte{} {
    69  		iw.IndexRecordIDForWordRecordSecondaryKey()
    70  	}
    71  	iw.IndexRecordIDForWordRecordOwnerID()
    72  	return
    73  }
    74  
    75  // SaveOrUpdate method set some data and write entire IndexWord record with all indexes or update exiting one!
    76  func (iw *IndexWord) SaveOrUpdate() (err protocol.Error) {
    77  	var check = IndexWord{
    78  		Word:             iw.Word,
    79  		RecordPrimaryKey: iw.RecordPrimaryKey,
    80  	}
    81  	err = check.GetByWordRecordPrimaryKey()
    82  	if err.Equal(object.ErrNotExist) {
    83  		err = iw.SaveNew()
    84  	} else if err != nil {
    85  		// TODO::: handle error
    86  	} else {
    87  		iw.Tokens = append(iw.Tokens, make([]WordToken, 0, len(iw.Tokens)+len(check.Tokens))...)
    88  		for _, token := range check.Tokens {
    89  			// TODO::: need to check first by RecordID??
    90  			iw.Tokens = append(iw.Tokens, token)
    91  		}
    92  		err = iw.Set()
    93  	}
    94  	return
    95  }
    96  
    97  // Set method set some data and write entire IndexWord record!
    98  func (iw *IndexWord) Set() (err protocol.Error) {
    99  	iw.RecordID = iw.hashWordRecordPrimaryKeyForRecordID()
   100  	iw.RecordStructureID = indexWordStructureID
   101  	iw.RecordSize = iw.LenAsSyllab()
   102  	iw.WriteTime = time.Now()
   103  	iw.OwnerAppID = protocol.OS.AppManifest().AppUUID()
   104  
   105  	var req = ganjine.SetRecordReq{
   106  		Type:   ganjine.RequestTypeBroadcast,
   107  		Record: iw.ToSyllab(),
   108  	}
   109  	err = gsdk.SetRecord(&req)
   110  	return
   111  }
   112  
   113  func (iw *IndexWord) hashWordRecordPrimaryKeyForRecordID() (hash [32]byte) {
   114  	const field = "WordRecordPrimaryKey"
   115  	var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32
   116  	syllab.SetUInt64(buf, 0, indexWordStructureID)
   117  	copy(buf[8:], iw.RecordPrimaryKey[:])
   118  	copy(buf[40:], field)
   119  	copy(buf[40+len(field):], iw.Word)
   120  	return sha3.Sum256(buf)
   121  }
   122  
   123  // GetByRecordID method read all existing record data by given RecordID!
   124  func (iw *IndexWord) GetByRecordID() (err protocol.Error) {
   125  	var req = ganjine.GetRecordReq{
   126  		RecordID:          iw.RecordID,
   127  		RecordStructureID: indexWordStructureID,
   128  	}
   129  	var res *ganjine.GetRecordRes
   130  	res, err = gsdk.GetRecord(&req)
   131  	if err != nil {
   132  		return
   133  	}
   134  
   135  	err = iw.FromSyllab(res.Record)
   136  	if err != nil {
   137  		return
   138  	}
   139  
   140  	if iw.RecordStructureID != indexWordStructureID {
   141  		err = ganjine.ErrMisMatchedStructureID
   142  	}
   143  	return
   144  }
   145  
   146  // GetByWordRecordPrimaryKey find RecordsID by given Word+RecordPrimaryKey
   147  func (iw *IndexWord) GetByWordRecordPrimaryKey() (err protocol.Error) {
   148  	iw.RecordID = iw.hashWordRecordPrimaryKeyForRecordID()
   149  	err = iw.GetByRecordID()
   150  	return
   151  }
   152  
   153  /*
   154  	-- Search Methods --
   155  */
   156  
   157  // FindRecordsIDByWord find RecordsID by given ID
   158  func (iw *IndexWord) FindRecordsIDByWord(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) {
   159  	var indexReq = &pehrest.HashGetValuesReq{
   160  		IndexKey: iw.hashWordforRecordID(),
   161  		Offset:   offset,
   162  		Limit:    limit,
   163  	}
   164  	var indexRes *pehrest.HashGetValuesRes
   165  	indexRes, err = psdk.HashGetValues(indexReq)
   166  	RecordsID = indexRes.IndexValues
   167  	return
   168  }
   169  
   170  // FindRecordsIDByWordRecordStructure find RecordsID by given Word+RecordStructure
   171  func (iw *IndexWord) FindRecordsIDByWordRecordStructure(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) {
   172  	var indexReq = &pehrest.HashGetValuesReq{
   173  		IndexKey: iw.hashWordRecordStructureForRecordID(),
   174  		Offset:   offset,
   175  		Limit:    limit,
   176  	}
   177  	var indexRes *pehrest.HashGetValuesRes
   178  	indexRes, err = psdk.HashGetValues(indexReq)
   179  	RecordsID = indexRes.IndexValues
   180  	return
   181  }
   182  
   183  // FindRecordsIDByWordSecondaryKey find RecordsID by given Word+SecondaryKey
   184  func (iw *IndexWord) FindRecordsIDByWordSecondaryKey(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) {
   185  	var indexReq = &pehrest.HashGetValuesReq{
   186  		IndexKey: iw.hashWordRecordSecondaryKeyForRecordID(),
   187  		Offset:   offset,
   188  		Limit:    limit,
   189  	}
   190  	var indexRes *pehrest.HashGetValuesRes
   191  	indexRes, err = psdk.HashGetValues(indexReq)
   192  	RecordsID = indexRes.IndexValues
   193  	return
   194  }
   195  
   196  // FindRecordsIDByWordRecordOwnerID find RecordsID by given Word+RecordOwnerID
   197  func (iw *IndexWord) FindRecordsIDByWordRecordOwnerID(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) {
   198  	var indexReq = &pehrest.HashGetValuesReq{
   199  		IndexKey: iw.hashWordRecordOwnerIDForRecordID(),
   200  		Offset:   offset,
   201  		Limit:    limit,
   202  	}
   203  	var indexRes *pehrest.HashGetValuesRes
   204  	indexRes, err = psdk.HashGetValues(indexReq)
   205  	RecordsID = indexRes.IndexValues
   206  	return
   207  }
   208  
   209  // FindByWordRecordStructure find  by given Word+RecordStructure
   210  func (iw *IndexWord) FindByWordRecordStructure(offset, limit uint64) (phraseTokens []PhraseToken, err protocol.Error) {
   211  	var indexReq = &pehrest.HashGetValuesReq{
   212  		IndexKey: iw.hashWordRecordStructureForRecordID(),
   213  		Offset:   offset,
   214  		Limit:    limit,
   215  	}
   216  	var indexRes *pehrest.HashGetValuesRes
   217  	indexRes, err = psdk.HashGetValues(indexReq)
   218  	var RecordsID = indexRes.IndexValues
   219  
   220  	phraseTokens = make([]PhraseToken, len(RecordsID))
   221  	for i := 0; i < len(RecordsID); i++ {
   222  		iw.RecordID = RecordsID[i]
   223  		iw.GetByRecordID()
   224  
   225  		phraseTokens[i] = PhraseToken{
   226  			RecordID:          iw.Tokens[len(iw.Tokens)-1].RecordID,
   227  			RecordStructureID: iw.RecordStructure,
   228  			RecordFieldID:     iw.Tokens[len(iw.Tokens)-1].RecordFieldID,
   229  			RecordPrimaryKey:  iw.RecordPrimaryKey,
   230  		}
   231  	}
   232  	return
   233  }
   234  
   235  /*
   236  	-- PRIMARY INDEXES --
   237  */
   238  
   239  // IndexRecordIDForWord save RecordID chain for ID+Language
   240  // Call in each update to the exiting record!
   241  func (iw *IndexWord) IndexRecordIDForWord() {
   242  	var indexRequest = pehrest.HashSetValueReq{
   243  		Type:       ganjine.RequestTypeBroadcast,
   244  		IndexKey:   iw.hashWordforRecordID(),
   245  		IndexValue: iw.RecordID,
   246  	}
   247  	var err = psdk.HashSetValue(&indexRequest)
   248  	if err != nil {
   249  		// TODO::: we must retry more due to record wrote successfully!
   250  	}
   251  }
   252  
   253  func (iw *IndexWord) hashWordforRecordID() (hash [32]byte) {
   254  	const field = "Word"
   255  	var buf = make([]byte, 8+len(field)+len(iw.Word))
   256  	syllab.SetUInt64(buf, 0, indexWordStructureID)
   257  	copy(buf[8:], field)
   258  	copy(buf[8+len(field):], iw.Word)
   259  	return sha3.Sum256(buf[:])
   260  }
   261  
   262  /*
   263  	-- SECONDARY INDEXES --
   264  */
   265  
   266  // IndexRecordIDForWordRecordStructure save RecordID chain for Word+RecordStructure
   267  // Don't call in update to an exiting record!
   268  func (iw *IndexWord) IndexRecordIDForWordRecordStructure() {
   269  	var indexRequest = pehrest.HashSetValueReq{
   270  		Type:       ganjine.RequestTypeBroadcast,
   271  		IndexKey:   iw.hashWordRecordStructureForRecordID(),
   272  		IndexValue: iw.RecordID,
   273  	}
   274  	var err = psdk.HashSetValue(&indexRequest)
   275  	if err != nil {
   276  		// TODO::: we must retry more due to record wrote successfully!
   277  	}
   278  }
   279  
   280  func (iw *IndexWord) hashWordRecordStructureForRecordID() (hash [32]byte) {
   281  	const field = "WordRecordStructure"
   282  	var buf = make([]byte, 16+len(field)+len(iw.Word)) // 8+8
   283  	syllab.SetUInt64(buf, 0, indexWordStructureID)
   284  	syllab.SetUInt64(buf, 8, iw.RecordStructure)
   285  	copy(buf[16:], field)
   286  	copy(buf[16+len(field):], iw.Word)
   287  	return sha3.Sum256(buf)
   288  }
   289  
   290  // IndexRecordIDForWordRecordSecondaryKey save RecordID chain for Word+RecordSecondaryKey
   291  // Don't call in update to an exiting record!
   292  func (iw *IndexWord) IndexRecordIDForWordRecordSecondaryKey() {
   293  	var indexRequest = pehrest.HashSetValueReq{
   294  		Type:       ganjine.RequestTypeBroadcast,
   295  		IndexKey:   iw.hashWordRecordSecondaryKeyForRecordID(),
   296  		IndexValue: iw.RecordID,
   297  	}
   298  	var err = psdk.HashSetValue(&indexRequest)
   299  	if err != nil {
   300  		// TODO::: we must retry more due to record wrote successfully!
   301  	}
   302  }
   303  
   304  func (iw *IndexWord) hashWordRecordSecondaryKeyForRecordID() (hash [32]byte) {
   305  	const field = "WordRecordSecondaryKey"
   306  	var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32
   307  	syllab.SetUInt64(buf, 0, indexWordStructureID)
   308  	copy(buf[8:], iw.RecordSecondaryKey[:])
   309  	copy(buf[40:], field)
   310  	copy(buf[40+len(field):], iw.Word)
   311  	return sha3.Sum256(buf)
   312  }
   313  
   314  // IndexRecordIDForWordRecordOwnerID save RecordID chain for Word+RecordOwnerID
   315  // Don't call in update to an exiting record!
   316  func (iw *IndexWord) IndexRecordIDForWordRecordOwnerID() {
   317  	var indexRequest = pehrest.HashSetValueReq{
   318  		Type:       ganjine.RequestTypeBroadcast,
   319  		IndexKey:   iw.hashWordRecordOwnerIDForRecordID(),
   320  		IndexValue: iw.RecordID,
   321  	}
   322  	var err = psdk.HashSetValue(&indexRequest)
   323  	if err != nil {
   324  		// TODO::: we must retry more due to record wrote successfully!
   325  	}
   326  }
   327  
   328  func (iw *IndexWord) hashWordRecordOwnerIDForRecordID() (hash [32]byte) {
   329  	const field = "WordRecordOwnerID"
   330  	var buf = make([]byte, 40+len(field)+len(iw.Word)) // 8+32
   331  	syllab.SetUInt64(buf, 0, indexWordStructureID)
   332  	copy(buf[8:], iw.RecordOwnerID[:])
   333  	copy(buf[40:], field)
   334  	copy(buf[40+len(field):], iw.Word)
   335  	return sha3.Sum256(buf)
   336  }
   337  
   338  /*
   339  	-- Syllab Encoder & Decoder --
   340  */
   341  
   342  func (iw *IndexWord) FromSyllab(payload []byte, stackIndex uint32) {
   343  	if uint32(len(buf)) < iw.LenOfSyllabStack() {
   344  		err = syllab.ErrShortArrayDecode
   345  		return
   346  	}
   347  	var i, add, ln uint32 // index, address and len of strings, slices, maps, ...
   348  
   349  	iw.Word = syllab.UnsafeGetString(buf, 88)
   350  	iw.RecordStructure = syllab.GetUInt64(buf, 96)
   351  	copy(iw.RecordPrimaryKey[:], buf[104:])
   352  	copy(iw.RecordSecondaryKey[:], buf[136:])
   353  	copy(iw.RecordOwnerID[:], buf[168:])
   354  
   355  	add = syllab.GetUInt32(buf, 200)
   356  	ln = syllab.GetUInt32(buf, 204)
   357  	iw.Tokens = make([]WordToken, ln)
   358  	for i = 0; i < ln; i++ {
   359  		iw.Tokens[i].FromSyllab(buf, add)
   360  		add += uint32(iw.Tokens[i].LenAsSyllab())
   361  	}
   362  	return
   363  }
   364  
   365  func (iw *IndexWord) ToSyllab(payload []byte, stackIndex, heapIndex uint32) (freeHeapIndex uint32) {
   366  	buf = make([]byte, iw.LenAsSyllab())
   367  	var hi uint32 = iw.LenOfSyllabStack() // Heap index || Stack size!
   368  
   369  	hi = syllab.SetString(buf, iw.Word, 88, hi)
   370  	syllab.SetUInt64(buf, 96, iw.RecordStructure)
   371  	copy(buf[104:], iw.RecordPrimaryKey[:])
   372  	copy(buf[136:], iw.RecordSecondaryKey[:])
   373  	copy(buf[168:], iw.RecordOwnerID[:])
   374  
   375  	syllab.SetUInt32(buf, 200, hi)
   376  	syllab.SetUInt32(buf, 204, uint32(len(iw.Tokens)))
   377  	for i := 0; i < len(iw.Tokens); i++ {
   378  		iw.Tokens[i].ToSyllab(buf, hi)
   379  		hi += uint32(iw.Tokens[i].LenAsSyllab())
   380  	}
   381  	return
   382  }
   383  
   384  func (iw *IndexWord) LenOfSyllabStack() uint32 {
   385  	ln = 208
   386  	return
   387  }
   388  
   389  func (iw *IndexWord) LenOfSyllabHeap() (ln uint32) {
   390  	ln += uint32(len(iw.Word))
   391  	ln += (uint32(len(iw.Tokens)) * iw.Tokens[0].LenOfSyllabStack())
   392  	// ln += uint32(len(iw.Tokens)) * iw.Tokens[0].LenOfSyllabHeap()
   393  	return
   394  }
   395  
   396  func (iw *IndexWord) LenAsSyllab() uint64 {
   397  	return uint64(iw.LenOfSyllabStack() + iw.LenOfSyllabHeap())
   398  }
   399  
   400  /*
   401  	-- Syllab Encoder & Decoder --
   402  */
   403  
   404  func (wt *WordToken) FromSyllab(buf []byte, stackIndex uint32) {
   405  	copy(wt.RecordID[:], buf[stackIndex:])
   406  	wt.RecordFieldID = syllab.GetUInt8(buf, stackIndex+32)
   407  	wt.WordType = WordType(syllab.GetUInt16(buf, stackIndex+33))
   408  	wt.WordOffsetInSentence = syllab.GetUInt64(buf, stackIndex+35)
   409  	wt.WordOffsetInText = syllab.GetUInt64(buf, stackIndex+43)
   410  	wt.OffsetInSentence = syllab.GetUInt64(buf, stackIndex+51)
   411  	wt.OffsetInText = syllab.GetUInt64(buf, stackIndex+59)
   412  }
   413  
   414  func (wt *WordToken) ToSyllab(buf []byte, stackIndex uint32) {
   415  	copy(buf[stackIndex:], wt.RecordID[:])
   416  	syllab.SetUInt8(buf, stackIndex+32, wt.RecordFieldID)
   417  	syllab.SetUInt16(buf, stackIndex+33, uint16(wt.WordType))
   418  	syllab.SetUInt64(buf, stackIndex+35, wt.WordOffsetInSentence)
   419  	syllab.SetUInt64(buf, stackIndex+43, wt.WordOffsetInText)
   420  	syllab.SetUInt64(buf, stackIndex+51, wt.OffsetInSentence)
   421  	syllab.SetUInt64(buf, stackIndex+59, wt.OffsetInText)
   422  }
   423  
   424  func (wt *WordToken) LenOfSyllabStack() uint32 {
   425  	return 67
   426  }
   427  
   428  func (wt *WordToken) LenOfSyllabHeap() (ln uint32) {
   429  	return
   430  }
   431  
   432  func (wt *WordToken) LenAsSyllab() uint64 {
   433  	return uint64(wt.LenOfSyllabStack() + wt.LenOfSyllabHeap())
   434  }