github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/operator/builtin/input/file/read_to_end.go (about) 1 package file 2 3 import ( 4 "bufio" 5 "context" 6 "fmt" 7 "os" 8 "path/filepath" 9 10 "github.com/observiq/carbon/entry" 11 "github.com/observiq/carbon/errors" 12 "github.com/observiq/carbon/operator/helper" 13 "go.uber.org/zap" 14 "golang.org/x/text/encoding" 15 "golang.org/x/text/transform" 16 ) 17 18 // ReadToEnd will read entries from a file and send them to the outputs of an input operator 19 func ReadToEnd( 20 ctx context.Context, 21 path string, 22 startOffset int64, 23 lastSeenFileSize int64, 24 messenger fileUpdateMessenger, 25 splitFunc bufio.SplitFunc, 26 filePathField entry.Field, 27 fileNameField entry.Field, 28 inputOperator helper.InputOperator, 29 maxLogSize int, 30 encoding encoding.Encoding, 31 ) error { 32 select { 33 case <-ctx.Done(): 34 return nil 35 default: 36 } 37 38 file, err := os.Open(path) 39 if err != nil { 40 return err 41 } 42 defer file.Close() 43 44 stat, err := file.Stat() 45 if err != nil { 46 return err 47 } 48 messenger.SetLastSeenFileSize(stat.Size()) 49 50 // Start at the beginning if the file has been truncated 51 if stat.Size() < startOffset { 52 startOffset = 0 53 messenger.SetOffset(0) 54 } 55 56 _, err = file.Seek(startOffset, 0) 57 if err != nil { 58 return fmt.Errorf("seek file: %s", err) 59 } 60 61 scanner := NewPositionalScanner(file, maxLogSize, startOffset, splitFunc) 62 63 // Make a large, reusable buffer for transforming 64 decoder := encoding.NewDecoder() 65 decodeBuffer := make([]byte, 16384) 66 67 fileName := filepath.Base(file.Name()) 68 emit := func(msgBuf []byte) { 69 decoder.Reset() 70 var nDst int 71 for { 72 nDst, _, err = decoder.Transform(decodeBuffer, msgBuf, true) 73 if err != nil && err == transform.ErrShortDst { 74 decodeBuffer = make([]byte, len(decodeBuffer)*2) 75 continue 76 } else if err != nil { 77 inputOperator.Errorw("failed to transform encoding", zap.Error(err)) 78 return 79 } 80 break 81 } 82 83 e, err := inputOperator.NewEntry(string(decodeBuffer[:nDst])) 84 if err != nil { 85 inputOperator.Errorw("Failed to create entry", zap.Error(err)) 86 return 87 } 88 89 e.Set(filePathField, path) 90 e.Set(fileNameField, fileName) 91 inputOperator.Write(ctx, e) 92 } 93 94 // Iterate over the tokenized file, emitting entries as we go 95 for { 96 select { 97 case <-ctx.Done(): 98 return nil 99 default: 100 } 101 102 ok := scanner.Scan() 103 if !ok { 104 if err := scanner.Err(); err == bufio.ErrTooLong { 105 return errors.NewError("log entry too large", "increase max_log_size or ensure that multiline regex patterns terminate") 106 } else if err != nil { 107 return errors.Wrap(err, "scanner error") 108 } 109 break 110 } 111 112 emit(scanner.Bytes()) 113 messenger.SetOffset(scanner.Pos()) 114 } 115 116 // If we're not at the end of the file, and we haven't 117 // advanced since last cycle, read the rest of the file as an entry 118 if scanner.Pos() < stat.Size() && scanner.Pos() == startOffset && lastSeenFileSize == stat.Size() { 119 _, err := file.Seek(scanner.Pos(), 0) 120 if err != nil { 121 return errors.Wrap(err, "seeking for trailing entry") 122 } 123 124 msgBuf := make([]byte, stat.Size()-scanner.Pos()) 125 n, err := file.Read(msgBuf) 126 if err != nil { 127 return errors.Wrap(err, "reading trailing entry") 128 } 129 emit(msgBuf[:n]) 130 messenger.SetOffset(scanner.Pos() + int64(n)) 131 } 132 133 return nil 134 }