github.com/mondo192/jfrog-client-go@v1.0.0/utils/io/content/contentreader.go (about) 1 package content 2 3 import ( 4 "bufio" 5 "encoding/json" 6 "errors" 7 "github.com/mondo192/jfrog-client-go/utils" 8 "github.com/mondo192/jfrog-client-go/utils/errorutils" 9 "github.com/mondo192/jfrog-client-go/utils/log" 10 "io" 11 "os" 12 "reflect" 13 "sort" 14 "sync" 15 ) 16 17 // Open and read JSON files, find the array key inside it and load its value into the memory in small chunks. 18 // Currently, 'ContentReader' only support extracting a single value for a given key (arrayKey), other keys are ignored. 19 // The value must be of type array. 20 // Each array value can be fetched using 'NextRecord' (thread-safe). 21 // This technique solves the limit of memory size which may be too small to fit large JSON. 22 type ContentReader struct { 23 // filesPaths - source data file paths. 24 filesPaths []string 25 // arrayKey - Read the value of the specific object in JSON. 26 arrayKey string 27 // The objects from the source data file are being pushed into the data channel. 28 dataChannel chan map[string]interface{} 29 errorsQueue *utils.ErrorsQueue 30 once *sync.Once 31 // Number of elements in the array (cache) 32 length int 33 empty bool 34 } 35 36 func NewContentReader(filePath string, arrayKey string) *ContentReader { 37 self := NewMultiSourceContentReader([]string{filePath}, arrayKey) 38 self.empty = filePath == "" 39 return self 40 } 41 42 func NewMultiSourceContentReader(filePaths []string, arrayKey string) *ContentReader { 43 self := ContentReader{} 44 self.filesPaths = filePaths 45 self.arrayKey = arrayKey 46 self.dataChannel = make(chan map[string]interface{}, utils.MaxBufferSize) 47 self.errorsQueue = utils.NewErrorsQueue(utils.MaxBufferSize) 48 self.once = new(sync.Once) 49 self.empty = len(filePaths) == 0 50 return &self 51 } 52 53 func NewEmptyContentReader(arrayKey string) *ContentReader { 54 self := NewContentReader("", arrayKey) 55 return self 56 } 57 58 func (cr *ContentReader) IsEmpty() bool { 59 return cr.empty 60 } 61 62 // Each call to 'NextRecord()' will return a single element from the channel. 63 // Only the first call invokes a goroutine to read data from the file and push it into the channel. 64 // 'io.EOF' will be returned if no data is left. 65 func (cr *ContentReader) NextRecord(recordOutput interface{}) error { 66 if cr.empty { 67 return errorutils.CheckErrorf("Empty") 68 } 69 cr.once.Do(func() { 70 go func() { 71 defer close(cr.dataChannel) 72 cr.length = 0 73 cr.run() 74 }() 75 }) 76 record, ok := <-cr.dataChannel 77 if !ok { 78 return io.EOF 79 } 80 // Transform the data into a Go type 81 err := ConvertToStruct(record, &recordOutput) 82 if err != nil { 83 cr.errorsQueue.AddError(err) 84 return err 85 } 86 cr.length++ 87 return err 88 } 89 90 // Prepare the reader to read the file all over again (not thread-safe). 91 func (cr *ContentReader) Reset() { 92 cr.dataChannel = make(chan map[string]interface{}, utils.MaxBufferSize) 93 cr.once = new(sync.Once) 94 } 95 96 // Cleanup the reader data. 97 func (cr *ContentReader) Close() error { 98 for _, filePath := range cr.filesPaths { 99 if filePath == "" { 100 continue 101 } 102 if err := errorutils.CheckError(os.Remove(filePath)); err != nil { 103 return errors.New("Failed to close reader: " + err.Error()) 104 } 105 } 106 cr.filesPaths = nil 107 return nil 108 } 109 110 func (cr *ContentReader) GetFilesPaths() []string { 111 return cr.filesPaths 112 } 113 114 // Number of element in the array. 115 func (cr *ContentReader) Length() (int, error) { 116 if cr.empty { 117 return 0, nil 118 } 119 if cr.length == 0 { 120 for item := new(interface{}); cr.NextRecord(item) == nil; item = new(interface{}) { 121 } 122 cr.Reset() 123 if err := cr.GetError(); err != nil { 124 return 0, err 125 } 126 } 127 return cr.length, nil 128 } 129 130 // Open and read the files one by one. Push each array element into the channel. 131 // The channel may block the thread, therefore should run async. 132 func (cr *ContentReader) run() { 133 for _, filePath := range cr.filesPaths { 134 cr.readSingleFile(filePath) 135 } 136 } 137 138 func (cr *ContentReader) readSingleFile(filePath string) { 139 fd, err := os.Open(filePath) 140 if err != nil { 141 log.Error(err.Error()) 142 cr.errorsQueue.AddError(errorutils.CheckError(err)) 143 return 144 } 145 defer func() { 146 err = fd.Close() 147 if err != nil { 148 log.Error(err.Error()) 149 cr.errorsQueue.AddError(errorutils.CheckError(err)) 150 } 151 }() 152 br := bufio.NewReaderSize(fd, 65536) 153 dec := json.NewDecoder(br) 154 err = findDecoderTargetPosition(dec, cr.arrayKey, true) 155 if err != nil { 156 if err == io.EOF { 157 cr.errorsQueue.AddError(errorutils.CheckErrorf(cr.arrayKey + " not found")) 158 return 159 } 160 cr.errorsQueue.AddError(err) 161 log.Error(err.Error()) 162 return 163 } 164 for dec.More() { 165 var ResultItem map[string]interface{} 166 err := dec.Decode(&ResultItem) 167 if err != nil { 168 log.Error(err) 169 cr.errorsQueue.AddError(errorutils.CheckError(err)) 170 return 171 } 172 cr.dataChannel <- ResultItem 173 } 174 } 175 176 func (cr *ContentReader) GetError() error { 177 return cr.errorsQueue.GetError() 178 } 179 180 // Search and set the decoder's position at the desired key in the JSON file. 181 // If the desired key is not found, return io.EOF 182 func findDecoderTargetPosition(dec *json.Decoder, target string, isArray bool) error { 183 for dec.More() { 184 // Token returns the next JSON token in the input stream. 185 t, err := dec.Token() 186 if err != nil { 187 return errorutils.CheckError(err) 188 } 189 if t == target { 190 if isArray { 191 // Skip '[' 192 _, err = dec.Token() 193 } 194 return errorutils.CheckError(err) 195 } 196 } 197 return nil 198 } 199 200 func MergeReaders(arr []*ContentReader, arrayKey string) (contentReader *ContentReader, err error) { 201 cw, err := NewContentWriter(arrayKey, true, false) 202 if err != nil { 203 return nil, err 204 } 205 defer func() { 206 e := cw.Close() 207 if err == nil { 208 err = e 209 } 210 }() 211 for _, cr := range arr { 212 for item := new(interface{}); cr.NextRecord(item) == nil; item = new(interface{}) { 213 cw.Write(*item) 214 } 215 if err := cr.GetError(); err != nil { 216 return nil, err 217 } 218 } 219 contentReader = NewContentReader(cw.GetFilePath(), arrayKey) 220 return contentReader, nil 221 } 222 223 // Sort a content-reader in the required order (ascending or descending). 224 // Performs a merge-sort on the reader, splitting the reader to multiple readers of size 'utils.MaxBufferSize'. 225 // Sort each of the split readers, and merge them into a single sorted reader. 226 // In case of multiple items with the same key - all the items will appear in the sorted reader, but their order is not guaranteed to be preserved. 227 func SortContentReader(readerRecord SortableContentItem, reader *ContentReader, ascendingOrder bool) (*ContentReader, error) { 228 getSortKeyFunc := func(record interface{}) (string, error) { 229 // Get the expected record type from the reader. 230 recordType := reflect.ValueOf(readerRecord).Type() 231 recordItem := (reflect.New(recordType)).Interface() 232 err := ConvertToStruct(record, &recordItem) 233 if err != nil { 234 return "", err 235 } 236 contentItem, ok := recordItem.(SortableContentItem) 237 if !ok { 238 return "", errorutils.CheckErrorf("attempting to sort a content-reader with unsortable items") 239 } 240 return contentItem.GetSortKey(), nil 241 } 242 return SortContentReaderByCalculatedKey(reader, getSortKeyFunc, ascendingOrder) 243 } 244 245 type keyCalculationFunc func(interface{}) (string, error) 246 247 type SortRecord struct { 248 Key string `json:"key,omitempty"` 249 Record interface{} `json:"record,omitempty"` 250 } 251 252 func (sr SortRecord) GetSortKey() string { 253 return sr.Key 254 } 255 256 // Sort a ContentReader, according to a key generated by getKeyFunc. 257 // getKeyFunc gets an item from the reader and returns the key of the item. 258 // Attention! In case of multiple items with the same key - only the first item in the original reader will appear in the sorted one! The other items will be removed. 259 // Also pay attention that the order of the fields inside the objects might change. 260 func SortContentReaderByCalculatedKey(reader *ContentReader, getKeyFunc keyCalculationFunc, ascendingOrder bool) (contentReader *ContentReader, err error) { 261 var sortedReaders []*ContentReader 262 defer func() { 263 for _, r := range sortedReaders { 264 e := r.Close() 265 if err == nil { 266 err = e 267 } 268 } 269 }() 270 271 // Split reader to multiple sorted readers of size 'utils.MaxBufferSize'. 272 sortedReaders, err = splitReaderToSortedBufferSizeReadersByCalculatedKey(reader, getKeyFunc, ascendingOrder) 273 if err != nil { 274 return nil, err 275 } 276 277 // Merge the sorted readers. 278 return mergeSortedReadersByCalculatedKey(sortedReaders, ascendingOrder) 279 } 280 281 // Split the reader to multiple readers of size 'utils.MaxBufferSize' to prevent memory overflow. 282 // Sort each split-reader content according to the provided 'ascendingOrder'. 283 func splitReaderToSortedBufferSizeReadersByCalculatedKey(reader *ContentReader, getKeyFunc keyCalculationFunc, ascendingOrder bool) ([]*ContentReader, error) { 284 var splitReaders []*ContentReader 285 286 // Split and sort. 287 keysToContentItems := make(map[string]SortableContentItem) 288 allKeys := make([]string, 0, utils.MaxBufferSize) 289 for newRecord := new(interface{}); reader.NextRecord(newRecord) == nil; newRecord = new(interface{}) { 290 sortKey, err := getKeyFunc(newRecord) 291 if err != nil { 292 return nil, err 293 } 294 295 if _, exist := keysToContentItems[sortKey]; !exist { 296 recordWrapper := &SortRecord{Key: sortKey, Record: newRecord} 297 keysToContentItems[sortKey] = recordWrapper 298 allKeys = append(allKeys, sortKey) 299 if len(allKeys) == utils.MaxBufferSize { 300 sortedFile, err := SortAndSaveBufferToFile(keysToContentItems, allKeys, ascendingOrder) 301 if err != nil { 302 return nil, err 303 } 304 splitReaders = append(splitReaders, sortedFile) 305 keysToContentItems = make(map[string]SortableContentItem) 306 allKeys = make([]string, 0, utils.MaxBufferSize) 307 } 308 } 309 } 310 if err := reader.GetError(); err != nil { 311 return nil, err 312 } 313 reader.Reset() 314 if len(allKeys) > 0 { 315 sortedFile, err := SortAndSaveBufferToFile(keysToContentItems, allKeys, ascendingOrder) 316 if err != nil { 317 return nil, err 318 } 319 splitReaders = append(splitReaders, sortedFile) 320 } 321 322 return splitReaders, nil 323 } 324 325 func mergeSortedReadersByCalculatedKey(sortedReaders []*ContentReader, ascendingOrder bool) (contentReader *ContentReader, err error) { 326 if len(sortedReaders) == 0 { 327 contentReader = NewEmptyContentReader(DefaultKey) 328 return contentReader, nil 329 } 330 resultWriter, err := NewContentWriter(DefaultKey, true, false) 331 if err != nil { 332 return nil, err 333 } 334 defer func() { 335 e := resultWriter.Close() 336 if err == nil { 337 err = e 338 } 339 }() 340 currentContentItem := make([]*SortRecord, len(sortedReaders)) 341 sortedFilesClone := make([]*ContentReader, len(sortedReaders)) 342 copy(sortedFilesClone, sortedReaders) 343 344 for { 345 var candidateToWrite *SortRecord 346 smallestIndex := 0 347 for i := 0; i < len(sortedFilesClone); i++ { 348 if currentContentItem[i] == nil && sortedFilesClone[i] != nil { 349 record := new(SortRecord) 350 if err := sortedFilesClone[i].NextRecord(record); nil != err { 351 sortedFilesClone[i] = nil 352 continue 353 } 354 currentContentItem[i] = record 355 } 356 357 var candidateKey, currentKey string 358 if candidateToWrite != nil && currentContentItem[i] != nil { 359 candidateKey = candidateToWrite.Key 360 currentKey = currentContentItem[i].Key 361 362 // If there are two items with the same key - the second one will be removed 363 if candidateKey == currentKey { 364 currentContentItem[i] = nil 365 } 366 } 367 if candidateToWrite == nil || (currentContentItem[i] != nil && compareStrings(candidateKey, currentKey, ascendingOrder)) { 368 candidateToWrite = currentContentItem[i] 369 smallestIndex = i 370 } 371 } 372 if candidateToWrite == nil { 373 break 374 } 375 resultWriter.Write(candidateToWrite.Record) 376 currentContentItem[smallestIndex] = nil 377 } 378 contentReader = NewContentReader(resultWriter.GetFilePath(), resultWriter.GetArrayKey()) 379 return contentReader, nil 380 } 381 382 // Merge a slice of sorted content-readers into a single sorted content-reader. 383 func MergeSortedReaders(readerRecord SortableContentItem, sortedReaders []*ContentReader, ascendingOrder bool) (contentReader *ContentReader, err error) { 384 if len(sortedReaders) == 0 { 385 return NewEmptyContentReader(DefaultKey), nil 386 } 387 resultWriter, err := NewContentWriter(DefaultKey, true, false) 388 if err != nil { 389 return nil, err 390 } 391 defer func() { 392 e := resultWriter.Close() 393 if err == nil { 394 err = e 395 } 396 }() 397 398 // Get the expected record type from the reader. 399 value := reflect.ValueOf(readerRecord) 400 valueType := value.Type() 401 402 currentContentItem := make([]*SortableContentItem, len(sortedReaders)) 403 sortedFilesClone := make([]*ContentReader, len(sortedReaders)) 404 copy(sortedFilesClone, sortedReaders) 405 406 for { 407 var candidateToWrite *SortableContentItem 408 smallestIndex := 0 409 for i := 0; i < len(sortedFilesClone); i++ { 410 if currentContentItem[i] == nil && sortedFilesClone[i] != nil { 411 temp := (reflect.New(valueType)).Interface() 412 if err := sortedFilesClone[i].NextRecord(temp); nil != err { 413 sortedFilesClone[i] = nil 414 continue 415 } 416 // Expect to receive 'SortableContentItem'. 417 contentItem, ok := (temp).(SortableContentItem) 418 if !ok { 419 return nil, errorutils.CheckErrorf("Attempting to sort a content-reader with unsortable items.") 420 } 421 currentContentItem[i] = &contentItem 422 } 423 424 if candidateToWrite == nil || (currentContentItem[i] != nil && compareStrings((*candidateToWrite).GetSortKey(), 425 (*currentContentItem[i]).GetSortKey(), ascendingOrder)) { 426 candidateToWrite = currentContentItem[i] 427 smallestIndex = i 428 } 429 } 430 if candidateToWrite == nil { 431 break 432 } 433 resultWriter.Write(*candidateToWrite) 434 currentContentItem[smallestIndex] = nil 435 } 436 contentReader = NewContentReader(resultWriter.GetFilePath(), resultWriter.GetArrayKey()) 437 return contentReader, nil 438 } 439 440 func compareStrings(src, against string, ascendingOrder bool) bool { 441 if ascendingOrder { 442 return src > against 443 } 444 return src < against 445 } 446 447 func SortAndSaveBufferToFile(keysToContentItems map[string]SortableContentItem, allKeys []string, increasingOrder bool) (contentReader *ContentReader, err error) { 448 if len(allKeys) == 0 { 449 return nil, nil 450 } 451 writer, err := NewContentWriter(DefaultKey, true, false) 452 if err != nil { 453 return nil, err 454 } 455 defer func() { 456 e := writer.Close() 457 if err == nil { 458 err = e 459 } 460 }() 461 if increasingOrder { 462 sort.Strings(allKeys) 463 } else { 464 sort.Sort(sort.Reverse(sort.StringSlice(allKeys))) 465 } 466 for _, v := range allKeys { 467 writer.Write(keysToContentItems[v]) 468 } 469 contentReader = NewContentReader(writer.GetFilePath(), writer.GetArrayKey()) 470 return contentReader, nil 471 } 472 473 func ConvertToStruct(record, recordOutput interface{}) error { 474 data, err := json.Marshal(record) 475 if errorutils.CheckError(err) != nil { 476 return err 477 } 478 err = errorutils.CheckError(json.Unmarshal(data, recordOutput)) 479 return err 480 }