github.com/observiq/carbon@v0.9.11-0.20200820160507-1b872e368a5e/operator/builtin/input/file/read_to_end.go (about)

     1  package file
     2  
     3  import (
     4  	"bufio"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"path/filepath"
     9  
    10  	"github.com/observiq/carbon/entry"
    11  	"github.com/observiq/carbon/errors"
    12  	"github.com/observiq/carbon/operator/helper"
    13  	"go.uber.org/zap"
    14  	"golang.org/x/text/encoding"
    15  	"golang.org/x/text/transform"
    16  )
    17  
    18  // ReadToEnd will read entries from a file and send them to the outputs of an input operator
    19  func ReadToEnd(
    20  	ctx context.Context,
    21  	path string,
    22  	startOffset int64,
    23  	lastSeenFileSize int64,
    24  	messenger fileUpdateMessenger,
    25  	splitFunc bufio.SplitFunc,
    26  	filePathField entry.Field,
    27  	fileNameField entry.Field,
    28  	inputOperator helper.InputOperator,
    29  	maxLogSize int,
    30  	encoding encoding.Encoding,
    31  ) error {
    32  	select {
    33  	case <-ctx.Done():
    34  		return nil
    35  	default:
    36  	}
    37  
    38  	file, err := os.Open(path)
    39  	if err != nil {
    40  		return err
    41  	}
    42  	defer file.Close()
    43  
    44  	stat, err := file.Stat()
    45  	if err != nil {
    46  		return err
    47  	}
    48  	messenger.SetLastSeenFileSize(stat.Size())
    49  
    50  	// Start at the beginning if the file has been truncated
    51  	if stat.Size() < startOffset {
    52  		startOffset = 0
    53  		messenger.SetOffset(0)
    54  	}
    55  
    56  	_, err = file.Seek(startOffset, 0)
    57  	if err != nil {
    58  		return fmt.Errorf("seek file: %s", err)
    59  	}
    60  
    61  	scanner := NewPositionalScanner(file, maxLogSize, startOffset, splitFunc)
    62  
    63  	// Make a large, reusable buffer for transforming
    64  	decoder := encoding.NewDecoder()
    65  	decodeBuffer := make([]byte, 16384)
    66  
    67  	fileName := filepath.Base(file.Name())
    68  	emit := func(msgBuf []byte) {
    69  		decoder.Reset()
    70  		var nDst int
    71  		for {
    72  			nDst, _, err = decoder.Transform(decodeBuffer, msgBuf, true)
    73  			if err != nil && err == transform.ErrShortDst {
    74  				decodeBuffer = make([]byte, len(decodeBuffer)*2)
    75  				continue
    76  			} else if err != nil {
    77  				inputOperator.Errorw("failed to transform encoding", zap.Error(err))
    78  				return
    79  			}
    80  			break
    81  		}
    82  
    83  		e, err := inputOperator.NewEntry(string(decodeBuffer[:nDst]))
    84  		if err != nil {
    85  			inputOperator.Errorw("Failed to create entry", zap.Error(err))
    86  			return
    87  		}
    88  
    89  		e.Set(filePathField, path)
    90  		e.Set(fileNameField, fileName)
    91  		inputOperator.Write(ctx, e)
    92  	}
    93  
    94  	// Iterate over the tokenized file, emitting entries as we go
    95  	for {
    96  		select {
    97  		case <-ctx.Done():
    98  			return nil
    99  		default:
   100  		}
   101  
   102  		ok := scanner.Scan()
   103  		if !ok {
   104  			if err := scanner.Err(); err == bufio.ErrTooLong {
   105  				return errors.NewError("log entry too large", "increase max_log_size or ensure that multiline regex patterns terminate")
   106  			} else if err != nil {
   107  				return errors.Wrap(err, "scanner error")
   108  			}
   109  			break
   110  		}
   111  
   112  		emit(scanner.Bytes())
   113  		messenger.SetOffset(scanner.Pos())
   114  	}
   115  
   116  	// If we're not at the end of the file, and we haven't
   117  	// advanced since last cycle, read the rest of the file as an entry
   118  	if scanner.Pos() < stat.Size() && scanner.Pos() == startOffset && lastSeenFileSize == stat.Size() {
   119  		_, err := file.Seek(scanner.Pos(), 0)
   120  		if err != nil {
   121  			return errors.Wrap(err, "seeking for trailing entry")
   122  		}
   123  
   124  		msgBuf := make([]byte, stat.Size()-scanner.Pos())
   125  		n, err := file.Read(msgBuf)
   126  		if err != nil {
   127  			return errors.Wrap(err, "reading trailing entry")
   128  		}
   129  		emit(msgBuf[:n])
   130  		messenger.SetOffset(scanner.Pos() + int64(n))
   131  	}
   132  
   133  	return nil
   134  }