github.com/btwiuse/jiri@v0.0.0-20191125065820-53353bcfef54/textutil/rune.go (about) 1 // Copyright 2015 The Vanadium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package textutil 6 7 import ( 8 "bytes" 9 ) 10 11 // TODO(toddw): Add UTF16 support. 12 13 const ( 14 EOF = rune(-1) // Indicates the end of a rune stream. 15 LineSeparator = '\u2028' // Unicode line separator rune. 16 ParagraphSeparator = '\u2029' // Unicode paragraph separator rune. 17 ) 18 19 // RuneEncoder is the interface to an encoder of a stream of runes into 20 // bytes.Buffer. 21 type RuneEncoder interface { 22 // Encode encodes r into buf. 23 Encode(r rune, buf *bytes.Buffer) 24 } 25 26 // RuneChunkDecoder is the interface to a decoder of a stream of encoded runes 27 // that may be arbitrarily chunked. 28 // 29 // Implementations of RuneChunkDecoder are commonly used to implement io.Writer 30 // wrappers, to handle buffering when chunk boundaries may occur in the middle 31 // of an encoded rune. 32 type RuneChunkDecoder interface { 33 // DecodeRune returns the next rune in chunk, and its width in bytes. If 34 // chunk represents a partial rune, the chunk is buffered and returns EOF and 35 // the size of the chunk. Subsequent calls to DecodeRune will combine 36 // previously buffered data when decoding. 37 DecodeRune(chunk []byte) (r rune, n int) 38 // FlushRune returns the next buffered rune. Returns EOF when all buffered 39 // data is returned. 40 FlushRune() rune 41 } 42 43 // WriteRuneChunk is a helper that repeatedly calls d.DecodeRune(chunk) until 44 // EOF, calling fn for every rune that is decoded. Returns the number of bytes 45 // in data that were successfully processed. If fn returns an error, 46 // WriteRuneChunk will return with that error, without processing any more data. 47 // 48 // This is a convenience for implementing io.Writer, given a RuneChunkDecoder. 49 func WriteRuneChunk(d RuneChunkDecoder, fn func(rune) error, chunk []byte) (int, error) { 50 pos := 0 51 for pos < len(chunk) { 52 r, size := d.DecodeRune(chunk[pos:]) 53 pos += size 54 if r == EOF { 55 break 56 } 57 if err := fn(r); err != nil { 58 return pos, err 59 } 60 } 61 return pos, nil 62 } 63 64 // FlushRuneChunk is a helper that repeatedly calls d.FlushRune until EOF, 65 // calling fn for every rune that is decoded. If fn returns an error, Flush 66 // will return with that error, without processing any more data. 67 // 68 // This is a convenience for implementing an additional Flush() call on an 69 // implementation of io.Writer, given a RuneChunkDecoder. 70 func FlushRuneChunk(d RuneChunkDecoder, fn func(rune) error) error { 71 for { 72 r := d.FlushRune() 73 if r == EOF { 74 return nil 75 } 76 if err := fn(r); err != nil { 77 return err 78 } 79 } 80 } 81 82 // bytePos and runePos distinguish positions that are used in either domain; 83 // we're trying to avoid silly mistakes like adding a bytePos to a runePos. 84 type bytePos int 85 type runePos int 86 87 // byteRuneBuffer maintains a buffer with both byte and rune based positions. 88 type byteRuneBuffer struct { 89 enc RuneEncoder 90 buf bytes.Buffer 91 runeLen runePos 92 } 93 94 func (b *byteRuneBuffer) ByteLen() bytePos { return bytePos(b.buf.Len()) } 95 func (b *byteRuneBuffer) RuneLen() runePos { return b.runeLen } 96 func (b *byteRuneBuffer) Bytes() []byte { return b.buf.Bytes() } 97 98 func (b *byteRuneBuffer) Reset() { 99 b.buf.Reset() 100 b.runeLen = 0 101 } 102 103 // WriteRune writes r into b. 104 func (b *byteRuneBuffer) WriteRune(r rune) { 105 b.enc.Encode(r, &b.buf) 106 b.runeLen++ 107 } 108 109 // WriteString writes str into b. 110 func (b *byteRuneBuffer) WriteString(str string) { 111 for _, r := range str { 112 b.WriteRune(r) 113 } 114 } 115 116 // WriteString0Runes writes str into b, not incrementing the rune length. 117 func (b *byteRuneBuffer) WriteString0Runes(str string) { 118 for _, r := range str { 119 b.enc.Encode(r, &b.buf) 120 } 121 }