github.com/hashicorp/hcl/v2@v2.20.0/pos_scanner.go (about) 1 // Copyright (c) HashiCorp, Inc. 2 // SPDX-License-Identifier: MPL-2.0 3 4 package hcl 5 6 import ( 7 "bufio" 8 "bytes" 9 10 "github.com/apparentlymart/go-textseg/v15/textseg" 11 ) 12 13 // RangeScanner is a helper that will scan over a buffer using a bufio.SplitFunc 14 // and visit a source range for each token matched. 15 // 16 // For example, this can be used with bufio.ScanLines to find the source range 17 // for each line in the file, skipping over the actual newline characters, which 18 // may be useful when printing source code snippets as part of diagnostic 19 // messages. 20 // 21 // The line and column information in the returned ranges is produced by 22 // counting newline characters and grapheme clusters respectively, which 23 // mimics the behavior we expect from a parser when producing ranges. 24 type RangeScanner struct { 25 filename string 26 b []byte 27 cb bufio.SplitFunc 28 29 pos Pos // position of next byte to process in b 30 cur Range // latest range 31 tok []byte // slice of b that is covered by cur 32 err error // error from last scan, if any 33 } 34 35 // NewRangeScanner creates a new RangeScanner for the given buffer, producing 36 // ranges for the given filename. 37 // 38 // Since ranges have grapheme-cluster granularity rather than byte granularity, 39 // the scanner will produce incorrect results if the given SplitFunc creates 40 // tokens between grapheme cluster boundaries. In particular, it is incorrect 41 // to use RangeScanner with bufio.ScanRunes because it will produce tokens 42 // around individual UTF-8 sequences, which will split any multi-sequence 43 // grapheme clusters. 44 func NewRangeScanner(b []byte, filename string, cb bufio.SplitFunc) *RangeScanner { 45 return NewRangeScannerFragment(b, filename, InitialPos, cb) 46 } 47 48 // NewRangeScannerFragment is like NewRangeScanner but the ranges it produces 49 // will be offset by the given starting position, which is appropriate for 50 // sub-slices of a file, whereas NewRangeScanner assumes it is scanning an 51 // entire file. 52 func NewRangeScannerFragment(b []byte, filename string, start Pos, cb bufio.SplitFunc) *RangeScanner { 53 return &RangeScanner{ 54 filename: filename, 55 b: b, 56 cb: cb, 57 pos: start, 58 } 59 } 60 61 func (sc *RangeScanner) Scan() bool { 62 if sc.pos.Byte >= len(sc.b) || sc.err != nil { 63 // All done 64 return false 65 } 66 67 // Since we're operating on an in-memory buffer, we always pass the whole 68 // remainder of the buffer to our SplitFunc and set isEOF to let it know 69 // that it has the whole thing. 70 advance, token, err := sc.cb(sc.b[sc.pos.Byte:], true) 71 72 // Since we are setting isEOF to true this should never happen, but 73 // if it does we will just abort and assume the SplitFunc is misbehaving. 74 if advance == 0 && token == nil && err == nil { 75 return false 76 } 77 78 if err != nil { 79 sc.err = err 80 sc.cur = Range{ 81 Filename: sc.filename, 82 Start: sc.pos, 83 End: sc.pos, 84 } 85 sc.tok = nil 86 return false 87 } 88 89 sc.tok = token 90 start := sc.pos 91 end := sc.pos 92 new := sc.pos 93 94 // adv is similar to token but it also includes any subsequent characters 95 // we're being asked to skip over by the SplitFunc. 96 // adv is a slice covering any additional bytes we are skipping over, based 97 // on what the SplitFunc told us to do with advance. 98 adv := sc.b[sc.pos.Byte : sc.pos.Byte+advance] 99 100 // We now need to scan over our token to count the grapheme clusters 101 // so we can correctly advance Column, and count the newlines so we 102 // can correctly advance Line. 103 advR := bytes.NewReader(adv) 104 gsc := bufio.NewScanner(advR) 105 advanced := 0 106 gsc.Split(textseg.ScanGraphemeClusters) 107 for gsc.Scan() { 108 gr := gsc.Bytes() 109 new.Byte += len(gr) 110 new.Column++ 111 112 // We rely here on the fact that \r\n is considered a grapheme cluster 113 // and so we don't need to worry about miscounting additional lines 114 // on files with Windows-style line endings. 115 if len(gr) != 0 && (gr[0] == '\r' || gr[0] == '\n') { 116 new.Column = 1 117 new.Line++ 118 } 119 120 if advanced < len(token) { 121 // If we've not yet found the end of our token then we'll 122 // also push our "end" marker along. 123 // (if advance > len(token) then we'll stop moving "end" early 124 // so that the caller only sees the range covered by token.) 125 end = new 126 } 127 advanced += len(gr) 128 } 129 130 sc.cur = Range{ 131 Filename: sc.filename, 132 Start: start, 133 End: end, 134 } 135 sc.pos = new 136 return true 137 } 138 139 // Range returns a range that covers the latest token obtained after a call 140 // to Scan returns true. 141 func (sc *RangeScanner) Range() Range { 142 return sc.cur 143 } 144 145 // Bytes returns the slice of the input buffer that is covered by the range 146 // that would be returned by Range. 147 func (sc *RangeScanner) Bytes() []byte { 148 return sc.tok 149 } 150 151 // Err can be called after Scan returns false to determine if the latest read 152 // resulted in an error, and obtain that error if so. 153 func (sc *RangeScanner) Err() error { 154 return sc.err 155 }