github.com/btwiuse/jiri@v0.0.0-20191125065820-53353bcfef54/textutil/rune.go (about)

     1  // Copyright 2015 The Vanadium Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package textutil
     6  
     7  import (
     8  	"bytes"
     9  )
    10  
    11  // TODO(toddw): Add UTF16 support.
    12  
    13  const (
    14  	EOF                = rune(-1) // Indicates the end of a rune stream.
    15  	LineSeparator      = '\u2028' // Unicode line separator rune.
    16  	ParagraphSeparator = '\u2029' // Unicode paragraph separator rune.
    17  )
    18  
    19  // RuneEncoder is the interface to an encoder of a stream of runes into
    20  // bytes.Buffer.
    21  type RuneEncoder interface {
    22  	// Encode encodes r into buf.
    23  	Encode(r rune, buf *bytes.Buffer)
    24  }
    25  
    26  // RuneChunkDecoder is the interface to a decoder of a stream of encoded runes
    27  // that may be arbitrarily chunked.
    28  //
    29  // Implementations of RuneChunkDecoder are commonly used to implement io.Writer
    30  // wrappers, to handle buffering when chunk boundaries may occur in the middle
    31  // of an encoded rune.
    32  type RuneChunkDecoder interface {
    33  	// DecodeRune returns the next rune in chunk, and its width in bytes.  If
    34  	// chunk represents a partial rune, the chunk is buffered and returns EOF and
    35  	// the size of the chunk.  Subsequent calls to DecodeRune will combine
    36  	// previously buffered data when decoding.
    37  	DecodeRune(chunk []byte) (r rune, n int)
    38  	// FlushRune returns the next buffered rune.  Returns EOF when all buffered
    39  	// data is returned.
    40  	FlushRune() rune
    41  }
    42  
    43  // WriteRuneChunk is a helper that repeatedly calls d.DecodeRune(chunk) until
    44  // EOF, calling fn for every rune that is decoded.  Returns the number of bytes
    45  // in data that were successfully processed.  If fn returns an error,
    46  // WriteRuneChunk will return with that error, without processing any more data.
    47  //
    48  // This is a convenience for implementing io.Writer, given a RuneChunkDecoder.
    49  func WriteRuneChunk(d RuneChunkDecoder, fn func(rune) error, chunk []byte) (int, error) {
    50  	pos := 0
    51  	for pos < len(chunk) {
    52  		r, size := d.DecodeRune(chunk[pos:])
    53  		pos += size
    54  		if r == EOF {
    55  			break
    56  		}
    57  		if err := fn(r); err != nil {
    58  			return pos, err
    59  		}
    60  	}
    61  	return pos, nil
    62  }
    63  
    64  // FlushRuneChunk is a helper that repeatedly calls d.FlushRune until EOF,
    65  // calling fn for every rune that is decoded.  If fn returns an error, Flush
    66  // will return with that error, without processing any more data.
    67  //
    68  // This is a convenience for implementing an additional Flush() call on an
    69  // implementation of io.Writer, given a RuneChunkDecoder.
    70  func FlushRuneChunk(d RuneChunkDecoder, fn func(rune) error) error {
    71  	for {
    72  		r := d.FlushRune()
    73  		if r == EOF {
    74  			return nil
    75  		}
    76  		if err := fn(r); err != nil {
    77  			return err
    78  		}
    79  	}
    80  }
    81  
    82  // bytePos and runePos distinguish positions that are used in either domain;
    83  // we're trying to avoid silly mistakes like adding a bytePos to a runePos.
    84  type bytePos int
    85  type runePos int
    86  
    87  // byteRuneBuffer maintains a buffer with both byte and rune based positions.
    88  type byteRuneBuffer struct {
    89  	enc     RuneEncoder
    90  	buf     bytes.Buffer
    91  	runeLen runePos
    92  }
    93  
    94  func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) }
    95  func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen }
    96  func (b *byteRuneBuffer) Bytes() []byte    { return b.buf.Bytes() }
    97  
    98  func (b *byteRuneBuffer) Reset() {
    99  	b.buf.Reset()
   100  	b.runeLen = 0
   101  }
   102  
   103  // WriteRune writes r into b.
   104  func (b *byteRuneBuffer) WriteRune(r rune) {
   105  	b.enc.Encode(r, &b.buf)
   106  	b.runeLen++
   107  }
   108  
   109  // WriteString writes str into b.
   110  func (b *byteRuneBuffer) WriteString(str string) {
   111  	for _, r := range str {
   112  		b.WriteRune(r)
   113  	}
   114  }
   115  
   116  // WriteString0Runes writes str into b, not incrementing the rune length.
   117  func (b *byteRuneBuffer) WriteString0Runes(str string) {
   118  	for _, r := range str {
   119  		b.enc.Encode(r, &b.buf)
   120  	}
   121  }