github.com/geniusesgroup/libgo@v0.0.0-20220713101832-828057a9d3d4/matn/phrase.go (about)

     1  /* For license and copyright information please see LEGAL file in repository */
     2  
     3  package matn
     4  
     5  import (
     6  	"golang.org/x/crypto/sha3"
     7  
     8  	"../ganjine"
     9  	"../json"
    10  	"../pehrest"
    11  	"../protocol"
    12  	"../syllab"
    13  	"../time/utc"
    14  )
    15  
    16  const indexPhraseStructureID uint64 = 736712670881955651
    17  
    18  var indexPhraseStructure = ds.DataStructure{
    19  	URN:             "urn:giti:matn.protocol:data-structure:index-phrase",
    20  	ID:              indexPhraseStructureID,
    21  	IssueDate:       1608786641,
    22  	ExpiryDate:      0,
    23  	ExpireInFavorOf: "",
    24  	Status:          protocol.Software_PreAlpha,
    25  	Structure:       IndexPhrase{},
    26  
    27  	Name: map[protocol.LanguageID]string{
    28  		protocol.LanguageEnglish: "Index Phrase",
    29  	},
    30  	Description: map[protocol.LanguageID]string{
    31  		protocol.LanguageEnglish: "Store the phrase index",
    32  	},
    33  	TAGS: []string{
    34  		"",
    35  	},
    36  }
    37  
    38  // IndexPhrase is standard structure to store any hash byte index!!
    39  // It is simple secondary index e.g. hash("user@email.com")
    40  type IndexPhrase struct {
    41  	RecordID   [32]byte
    42  	Terms      []string `index-hash:"RecordID[pair,PageNumber]"` // Array must be order to retrievable!
    43  	PageNumber uint64
    44  	Tokens     [10]PhraseToken // Order of PhraseTokens index changed by algorithm in exact period of time!
    45  }
    46  
    47  // PhraseToken store detail about a word in the record to index
    48  type PhraseToken struct {
    49  	RecordID          [32]byte `json:",string"`
    50  	RecordStructureID uint64
    51  	RecordFieldID     uint8
    52  	RecordPrimaryKey  [32]byte `json:",string"` // Store any primary ID or any data up to 32 byte length
    53  	// Don't need Snippet text due to it is not web search engine!
    54  }
    55  
    56  // SaveNew method set some data and write entire IndexPhrase record with all indexes!
    57  func (ip *IndexPhrase) SaveNew() (err protocol.Error) {
    58  	err = ip.Set()
    59  	if err != nil {
    60  		return
    61  	}
    62  	ip.IndexRecordIDForTermsPageNumber()
    63  	return
    64  }
    65  
    66  // Set method set some data and write entire IndexPhrase record!
    67  func (ip *IndexPhrase) Set() (err protocol.Error) {
    68  	ip.RecordStructureID = indexPhraseStructureID
    69  	ip.RecordSize = ip.LenAsSyllab()
    70  	ip.WriteTime = utc.Now()
    71  	ip.OwnerAppID = protocol.OS.AppManifest().AppUUID()
    72  
    73  	var req = ganjine.SetRecordReq{
    74  		Type:   ganjine.RequestTypeBroadcast,
    75  		Record: ip.ToSyllab(),
    76  	}
    77  	ip.RecordID = sha3.Sum256(req.Record[32:])
    78  	copy(req.Record[0:], ip.RecordID[:])
    79  
    80  	err = gsdk.SetRecord(&req)
    81  	return
    82  }
    83  
    84  // GetByRecordID method read all existing record data by given RecordID!
    85  func (ip *IndexPhrase) GetByRecordID() (err protocol.Error) {
    86  	var req = ganjine.GetRecordReq{
    87  		RecordID:          ip.RecordID,
    88  		RecordStructureID: indexPhraseStructureID,
    89  	}
    90  	var res *ganjine.GetRecordRes
    91  	res, err = gsdk.GetRecord(&req)
    92  	if err != nil {
    93  		return
    94  	}
    95  
    96  	err = ip.FromSyllab(res.Record)
    97  	if err != nil {
    98  		return
    99  	}
   100  
   101  	if ip.RecordStructureID != indexPhraseStructureID {
   102  		err = ganjine.ErrMisMatchedStructureID
   103  	}
   104  	return
   105  }
   106  
   107  /*
   108  	-- Search Methods --
   109  */
   110  
   111  // FindRecordsIDByTermsPageNumber find RecordsID by given Terms+PageNumber
   112  func (ip *IndexPhrase) FindRecordsIDByTermsPageNumber(offset, limit uint64) (RecordsID [][32]byte, err protocol.Error) {
   113  	var indexReq = &pehrest.HashGetValuesReq{
   114  		IndexKey: ip.hashTermsPageNumberForRecordID(),
   115  		Offset:   offset,
   116  		Limit:    limit,
   117  	}
   118  	var indexRes *pehrest.HashGetValuesRes
   119  	indexRes, err = psdk.HashGetValues(indexReq)
   120  	RecordsID = indexRes.IndexValues
   121  	return
   122  }
   123  
   124  /*
   125  	-- PRIMARY INDEXES --
   126  */
   127  
   128  // IndexRecordIDForTermsPageNumber save RecordID chain for Terms+PageNumber
   129  // Call in each update to the exiting record!
   130  func (ip *IndexPhrase) IndexRecordIDForTermsPageNumber() {
   131  	var indexRequest = pehrest.HashSetValueReq{
   132  		Type:       ganjine.RequestTypeBroadcast,
   133  		IndexKey:   ip.hashTermsPageNumberForRecordID(),
   134  		IndexValue: ip.RecordID,
   135  	}
   136  	var err = psdk.HashSetValue(&indexRequest)
   137  	if err != nil {
   138  		// TODO::: we must retry more due to record wrote successfully!
   139  	}
   140  }
   141  
   142  func (ip *IndexPhrase) hashTermsPageNumberForRecordID() (hash [32]byte) {
   143  	const field = "TermsPageNumber"
   144  	var bufLen = 16 + len(field)
   145  	for _, t := range ip.Terms {
   146  		bufLen += len(t)
   147  	}
   148  	var buf = make([]byte, bufLen)
   149  	syllab.SetUInt64(buf, 0, indexPhraseStructureID)
   150  	syllab.SetUInt64(buf, 8, ip.PageNumber)
   151  	copy(buf[16:], field)
   152  	bufLen = 16 + len(field)
   153  	for _, t := range ip.Terms {
   154  		copy(buf[bufLen:], t)
   155  		bufLen += len(t)
   156  	}
   157  	return sha3.Sum256(buf[:])
   158  }
   159  
   160  /*
   161  	-- Syllab Encoder & Decoder --
   162  */
   163  
   164  func (ip *IndexPhrase) FromSyllab(payload []byte, stackIndex uint32) {
   165  	if uint32(len(payload)) < ip.LenOfSyllabStack() {
   166  		err = syllab.ErrShortArrayDecode
   167  		return
   168  	}
   169  
   170  	ip.Terms = syllab.UnsafeGetStringArray(payload, 88)
   171  	ip.PageNumber = syllab.GetUInt64(payload, 96)
   172  
   173  	var si uint32 = 104
   174  	for i := 0; i < 10; i++ {
   175  		ip.Tokens[i].FromSyllab(payload, si)
   176  		si += uint32(ip.Tokens[i].LenAsSyllab())
   177  	}
   178  	return
   179  }
   180  
   181  func (ip *IndexPhrase) ToSyllab(payload []byte, stackIndex, heapIndex uint32) (freeHeapIndex uint32) {
   182  	heapIndex = syllab.SetStringArray(payload, ip.Terms, 88, heapIndex)
   183  	syllab.SetUInt64(payload, 96, ip.PageNumber)
   184  
   185  	var si uint32 = 104
   186  	for i := 0; i < 10; i++ {
   187  		ip.Tokens[i].ToSyllab(payload, si)
   188  		si += uint32(ip.Tokens[i].LenAsSyllab())
   189  	}
   190  	return heapIndex
   191  }
   192  
   193  func (ip *IndexPhrase) LenOfSyllabStack() (ln uint32) {
   194  	ln = 104
   195  	ln += uint32(len(ip.Tokens)) * ip.Tokens[0].LenOfSyllabStack()
   196  	return
   197  }
   198  
   199  func (ip *IndexPhrase) LenOfSyllabHeap() (ln uint32) {
   200  	for i := 0; i < len(ip.Terms); i++ {
   201  		ln += uint32(len(ip.Terms[i]))
   202  	}
   203  	// ln += uint32(len(ip.Tokens)) * ip.Tokens[0].LenOfSyllabHeap
   204  	return
   205  }
   206  
   207  func (ip *IndexPhrase) LenAsSyllab() uint64 {
   208  	return uint64(ip.LenOfSyllabStack() + ip.LenOfSyllabHeap())
   209  }
   210  
   211  /*
   212  	-- PhraseToken Encoder & Decoder --
   213  */
   214  
   215  func (pt *PhraseToken) FromSyllab(buf []byte, stackIndex uint32) {
   216  	copy(pt.RecordID[:], buf[stackIndex:])
   217  	pt.RecordStructureID = syllab.GetUInt64(buf, stackIndex+32)
   218  	pt.RecordFieldID = syllab.GetUInt8(buf, stackIndex+40)
   219  	copy(pt.RecordPrimaryKey[:], buf[stackIndex+41:])
   220  }
   221  
   222  func (pt *PhraseToken) ToSyllab(buf []byte, stackIndex uint32) {
   223  	copy(buf[0:], pt.RecordID[stackIndex:])
   224  	syllab.SetUInt64(buf, stackIndex+32, pt.RecordStructureID)
   225  	syllab.SetUInt8(buf, stackIndex+40, pt.RecordFieldID)
   226  	copy(buf[stackIndex+41:], pt.RecordPrimaryKey[:])
   227  }
   228  
   229  func (pt *PhraseToken) LenOfSyllabStack() uint32 {
   230  	return 73
   231  }
   232  
   233  func (pt *PhraseToken) LenOfSyllabHeap() (ln uint32) {
   234  	return
   235  }
   236  
   237  func (pt *PhraseToken) LenAsSyllab() uint64 {
   238  	return uint64(pt.LenOfSyllabStack() + pt.LenOfSyllabHea)
   239  }
   240  
   241  func (pt *PhraseToken) FromJSON(decoder *json.DecoderUnsafeMinified) (err protocol.Error) {
   242  	for err == nil {
   243  		var keyName = decoder.DecodeKey()
   244  		switch keyName {
   245  		case "RecordID":
   246  			err = decoder.DecodeByteArrayAsBase64(pt.RecordID[:])
   247  		case "RecordStructureID":
   248  			pt.RecordStructureID, err = decoder.DecodeUInt64()
   249  		case "RecordFieldID":
   250  			pt.RecordFieldID, err = decoder.DecodeUInt8()
   251  		case "RecordPrimaryKey":
   252  			err = decoder.DecodeByteArrayAsBase64(pt.RecordPrimaryKey[:])
   253  		default:
   254  			err = decoder.NotFoundKeyStrict()
   255  		}
   256  
   257  		if decoder.End() {
   258  			return
   259  		}
   260  	}
   261  	return
   262  }
   263  
   264  func (pt *PhraseToken) ToJSON(payload []byte) []byte {
   265  	var encoder = json.Encoder{Buf: payload}
   266  
   267  	encoder.EncodeString(`{"RecordID":"`)
   268  	encoder.EncodeByteSliceAsBase64(pt.RecordID[:])
   269  
   270  	encoder.EncodeString(`","RecordStructureID":`)
   271  	encoder.EncodeUInt64(pt.RecordStructureID)
   272  
   273  	encoder.EncodeString(`,"RecordFieldID":`)
   274  	encoder.EncodeUInt8(pt.RecordFieldID)
   275  
   276  	encoder.EncodeString(`,"RecordPrimaryKey":"`)
   277  	encoder.EncodeByteSliceAsBase64(pt.RecordPrimaryKey[:])
   278  
   279  	encoder.EncodeString(`"}`)
   280  	return encoder.Buf
   281  }
   282  
   283  func (pt *PhraseToken) LenAsJSON() (ln int) {
   284  	ln = 184
   285  	return
   286  }