github.com/lingyao2333/mo-zero@v1.4.1/core/filex/lookup.go (about)

     1  package filex
     2  
     3  import (
     4  	"io"
     5  	"os"
     6  )
     7  
     8  // OffsetRange represents a content block of a file.
     9  type OffsetRange struct {
    10  	File  string
    11  	Start int64
    12  	Stop  int64
    13  }
    14  
    15  // SplitLineChunks splits file into chunks.
    16  // The whole line are guaranteed to be split in the same chunk.
    17  func SplitLineChunks(filename string, chunks int) ([]OffsetRange, error) {
    18  	info, err := os.Stat(filename)
    19  	if err != nil {
    20  		return nil, err
    21  	}
    22  
    23  	if chunks <= 1 {
    24  		return []OffsetRange{
    25  			{
    26  				File:  filename,
    27  				Start: 0,
    28  				Stop:  info.Size(),
    29  			},
    30  		}, nil
    31  	}
    32  
    33  	file, err := os.Open(filename)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  	defer file.Close()
    38  
    39  	var ranges []OffsetRange
    40  	var offset int64
    41  	// avoid the last chunk too few bytes
    42  	preferSize := info.Size()/int64(chunks) + 1
    43  	for {
    44  		if offset+preferSize >= info.Size() {
    45  			ranges = append(ranges, OffsetRange{
    46  				File:  filename,
    47  				Start: offset,
    48  				Stop:  info.Size(),
    49  			})
    50  			break
    51  		}
    52  
    53  		offsetRange, err := nextRange(file, offset, offset+preferSize)
    54  		if err != nil {
    55  			return nil, err
    56  		}
    57  
    58  		ranges = append(ranges, offsetRange)
    59  		if offsetRange.Stop < info.Size() {
    60  			offset = offsetRange.Stop
    61  		} else {
    62  			break
    63  		}
    64  	}
    65  
    66  	return ranges, nil
    67  }
    68  
    69  func nextRange(file *os.File, start, stop int64) (OffsetRange, error) {
    70  	offset, err := skipPartialLine(file, stop)
    71  	if err != nil {
    72  		return OffsetRange{}, err
    73  	}
    74  
    75  	return OffsetRange{
    76  		File:  file.Name(),
    77  		Start: start,
    78  		Stop:  offset,
    79  	}, nil
    80  }
    81  
    82  func skipPartialLine(file *os.File, offset int64) (int64, error) {
    83  	for {
    84  		skipBuf := make([]byte, bufSize)
    85  		n, err := file.ReadAt(skipBuf, offset)
    86  		if err != nil && err != io.EOF {
    87  			return 0, err
    88  		}
    89  		if n == 0 {
    90  			return 0, io.EOF
    91  		}
    92  
    93  		for i := 0; i < n; i++ {
    94  			if skipBuf[i] != '\r' && skipBuf[i] != '\n' {
    95  				offset++
    96  			} else {
    97  				for ; i < n; i++ {
    98  					if skipBuf[i] == '\r' || skipBuf[i] == '\n' {
    99  						offset++
   100  					} else {
   101  						return offset, nil
   102  					}
   103  				}
   104  				return offset, nil
   105  			}
   106  		}
   107  	}
   108  }