github.com/aretext/aretext@v1.3.0/text/tree.go (about) 1 package text 2 3 import ( 4 "fmt" 5 "io" 6 "strings" 7 "unicode/utf8" 8 9 textUtf8 "github.com/aretext/aretext/text/utf8" 10 ) 11 12 var ( 13 ErrInvalidUtf8 = fmt.Errorf("invalid UTF-8") 14 ) 15 16 // text.Tree is a data structure for representing UTF-8 text. 17 // It supports efficient insertions, deletions, and lookup by character offset and line number. 18 // It is inspired by two papers: 19 // Boehm, H. J., Atkinson, R., & Plass, M. (1995). Ropes: an alternative to strings. Software: Practice and Experience, 25(12), 1315-1330. 20 // Rao, J., & Ross, K. A. (2000, May). Making B+-trees cache conscious in main memory. In Proceedings of the 2000 ACM SIGMOD international conference on Management of data (pp. 475-486). 21 // Like a rope, the tree maintains character counts at each level to efficiently locate a character at a given offset. 22 // To use the CPU cache efficiently, all children of a node are pre-allocated in a group (what the Rao & Ross paper calls a "full" cache-sensitive B+ tree), 23 // and the parent uses offsets within the node group to identify child nodes. 24 // All nodes are carefully designed to fit as much data as possible within a 64-byte cache line. 25 type Tree struct { 26 root *innerNode 27 } 28 29 // NewTree returns a tree representing an empty string. 30 func NewTree() *Tree { 31 root := &innerNode{numKeys: 1} 32 root.child = &leafNodeGroup{numNodes: 1} 33 return &Tree{root} 34 } 35 36 // NewTreeFromReader creates a new Tree from a reader that produces UTF-8 text. 37 // This is more efficient than inserting the bytes into an empty tree. 38 // Returns an error if the bytes are invalid UTF-8. 39 func NewTreeFromReader(r io.Reader) (*Tree, error) { 40 leafGroups, err := bulkLoadIntoLeaves(r) 41 if err != nil { 42 return nil, err 43 } 44 root := buildTreeFromLeaves(leafGroups) 45 return &Tree{root}, nil 46 } 47 48 // NewTreeFromString creates a new Tree from a UTF-8 string. 49 func NewTreeFromString(s string) (*Tree, error) { 50 reader := strings.NewReader(s) 51 return NewTreeFromReader(reader) 52 } 53 54 func bulkLoadIntoLeaves(r io.Reader) ([]nodeGroup, error) { 55 v := textUtf8.NewValidator() 56 leafGroups := make([]nodeGroup, 0, 1) 57 currentGroup := &leafNodeGroup{numNodes: 1} 58 currentNode := ¤tGroup.nodes[0] 59 leafGroups = append(leafGroups, currentGroup) 60 61 var buf [1024]byte 62 for { 63 n, err := r.Read(buf[:]) 64 if err != nil && err != io.EOF { 65 return nil, err 66 } 67 68 if n == 0 { 69 break 70 } 71 72 if !v.ValidateBytes(buf[:n]) { 73 return nil, ErrInvalidUtf8 74 } 75 76 for i := 0; i < n; i++ { 77 charWidth := textUtf8.CharWidth[buf[i]] // zero for continuation bytes 78 if currentNode.numBytes+charWidth >= maxBytesPerLeaf { 79 if currentGroup.numNodes < maxNodesPerGroup { 80 currentNode = ¤tGroup.nodes[currentGroup.numNodes] 81 currentGroup.numNodes++ 82 } else { 83 newGroup := &leafNodeGroup{numNodes: 1} 84 leafGroups = append(leafGroups, newGroup) 85 newGroup.prev = currentGroup 86 currentGroup.next = newGroup 87 currentGroup = newGroup 88 currentNode = ¤tGroup.nodes[0] 89 } 90 } 91 92 currentNode.textBytes[currentNode.numBytes] = buf[i] 93 currentNode.numBytes++ 94 } 95 } 96 97 if !v.ValidateEnd() { 98 return nil, ErrInvalidUtf8 99 } 100 101 return leafGroups, nil 102 } 103 104 func buildTreeFromLeaves(leafGroups []nodeGroup) *innerNode { 105 childGroups := leafGroups 106 107 for { 108 parentGroups := make([]nodeGroup, 0, len(childGroups)/maxNodesPerGroup+1) 109 currentGroup := &innerNodeGroup{} 110 parentGroups = append(parentGroups, currentGroup) 111 112 for _, cg := range childGroups { 113 if currentGroup.numNodes == maxNodesPerGroup { 114 newGroup := &innerNodeGroup{} 115 parentGroups = append(parentGroups, newGroup) 116 currentGroup = newGroup 117 } 118 119 innerNode := ¤tGroup.nodes[currentGroup.numNodes] 120 innerNode.child = cg 121 innerNode.recalculateChildKeys() 122 currentGroup.numNodes++ 123 } 124 125 if len(parentGroups) == 1 { 126 root := innerNode{child: parentGroups[0]} 127 root.recalculateChildKeys() 128 return &root 129 } 130 131 childGroups = parentGroups 132 } 133 } 134 135 // NumChars returns the total number of characters (runes) in the tree. 136 func (t *Tree) NumChars() uint64 { 137 return t.root.numChars() 138 } 139 140 // NumLines returns the total number of lines in the tree. 141 func (t *Tree) NumLines() uint64 { 142 return t.root.numNewlines() + 1 143 } 144 145 // InsertAtPosition inserts a UTF-8 character at the specified position (0-indexed). 146 // If charPos is past the end of the text, it will be appended at the end. 147 // Returns an error if c is not a valid UTF-8 character. 148 func (t *Tree) InsertAtPosition(charPos uint64, c rune) error { 149 invalidateKeys, splitNode, err := t.root.insertAtPosition(charPos, c) 150 if err != nil { 151 return err 152 } 153 154 if invalidateKeys { 155 t.root.recalculateChildKeys() 156 } 157 158 if splitNode != nil { 159 newGroup := innerNodeGroup{numNodes: 2} 160 newGroup.nodes[0] = *t.root 161 newGroup.nodes[1] = *splitNode 162 163 t.root = &innerNode{child: &newGroup} 164 t.root.recalculateChildKeys() 165 } 166 167 return nil 168 } 169 170 // DeleteAtPosition removes the UTF-8 character at the specified position (0-indexed). 171 // If charPos is past the end of the text, this has no effect. 172 func (t *Tree) DeleteAtPosition(charPos uint64) (bool, rune) { 173 didDelete, _, r := t.root.deleteAtPosition(charPos) 174 return didDelete, r 175 } 176 177 // ReaderAtPosition returns a reader starting at the UTF-8 character at the specified position (0-indexed). 178 // If the position is past the end of the text, the returned reader will read zero bytes. 179 func (t *Tree) ReaderAtPosition(charPos uint64) Reader { 180 return t.root.readerAtPosition(charPos) 181 } 182 183 // ReverseReaderAtPosition returns a reverse reader starting at the specified position. 184 func (t *Tree) ReverseReaderAtPosition(charPos uint64) ReverseReader { 185 return t.root.reverseReaderAtPosition(charPos) 186 } 187 188 // LineStartPosition returns the position of the first character at the specified line (0-indexed). 189 // If the line number is greater than the maximum line number, returns one past the position of the last character. 190 func (t *Tree) LineStartPosition(lineNum uint64) uint64 { 191 if lineNum == 0 { 192 // Special case the first line, since it's the only line that doesn't immediately follow a newline character. 193 return 0 194 } 195 196 return t.root.positionAfterNewline(lineNum - 1) 197 } 198 199 // LineNumForPosition returns the line number (0-indexed) for the line containing the specified position. 200 func (t *Tree) LineNumForPosition(charPos uint64) uint64 { 201 return t.root.numNewlinesBeforePosition(charPos) 202 } 203 204 // String returns the text in the tree as a string. 205 func (t *Tree) String() string { 206 reader := t.ReaderAtPosition(0) 207 retrievedBytes, err := io.ReadAll(&reader) 208 if err != nil { 209 panic("Unexpected error reading bytes from text.Tree") 210 } 211 return string(retrievedBytes) 212 } 213 214 const maxKeysPerNode = 64 215 const maxNodesPerGroup = maxKeysPerNode 216 const maxBytesPerLeaf = 63 217 218 // nodeGroup is either an inner node group or a leaf node group. 219 type nodeGroup interface { 220 keys() []indexKey 221 insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error) 222 deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune) 223 readerAtPosition(nodeIdx uint64, charPos uint64) Reader 224 reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader 225 positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64 226 numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64 227 } 228 229 // indexKey is used to navigate from an inner node to the child node containing a particular line or character offset. 230 type indexKey struct { 231 232 // Number of UTF-8 characters in a subtree. 233 numChars uint64 234 235 // Number of newline characters in a subtree. 236 numNewlines uint64 237 } 238 239 // innerNodeGroup is a group of inner nodes referenced by a parent inner node. 240 type innerNodeGroup struct { 241 numNodes uint64 242 nodes [maxNodesPerGroup]innerNode 243 } 244 245 func (g *innerNodeGroup) keys() []indexKey { 246 keys := make([]indexKey, g.numNodes) 247 for i := uint64(0); i < g.numNodes; i++ { 248 keys[i] = g.nodes[i].key() 249 } 250 return keys 251 } 252 253 func (g *innerNodeGroup) insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error) { 254 _, splitNode, err := g.nodes[nodeIdx].insertAtPosition(charPos, c) 255 if err != nil { 256 return false, nil, err 257 } 258 259 if splitNode == nil { 260 return false, nil, nil 261 } 262 263 splitIdx := nodeIdx + 1 264 if g.numNodes < maxNodesPerGroup { 265 g.insertNode(splitIdx, splitNode) 266 return true, nil, nil 267 } 268 269 splitGroup := g.split() 270 if splitIdx < g.numNodes { 271 g.insertNode(splitIdx, splitNode) 272 } else { 273 splitGroup.insertNode(splitIdx-g.numNodes, splitNode) 274 } 275 276 return true, splitGroup, nil 277 } 278 279 func (g *innerNodeGroup) insertNode(nodeIdx uint64, node *innerNode) { 280 for i := int(g.numNodes); i > int(nodeIdx); i-- { 281 g.nodes[i] = g.nodes[i-1] 282 } 283 g.nodes[nodeIdx] = *node 284 g.numNodes++ 285 } 286 287 func (g *innerNodeGroup) split() *innerNodeGroup { 288 mid := g.numNodes / 2 289 splitGroup := innerNodeGroup{numNodes: g.numNodes - mid} 290 for i := uint64(0); i < splitGroup.numNodes; i++ { 291 splitGroup.nodes[i] = g.nodes[mid+i] 292 } 293 g.numNodes = mid 294 return &splitGroup 295 } 296 297 func (g *innerNodeGroup) deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune) { 298 return g.nodes[nodeIdx].deleteAtPosition(charPos) 299 } 300 301 func (g *innerNodeGroup) readerAtPosition(nodeIdx uint64, charPos uint64) Reader { 302 return g.nodes[nodeIdx].readerAtPosition(charPos) 303 } 304 305 func (g *innerNodeGroup) reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader { 306 return g.nodes[nodeIdx].reverseReaderAtPosition(charPos) 307 } 308 309 func (g *innerNodeGroup) positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64 { 310 return g.nodes[nodeIdx].positionAfterNewline(newlineIdx) 311 } 312 313 func (g *innerNodeGroup) numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64 { 314 return g.nodes[nodeIdx].numNewlinesBeforePosition(charPos) 315 } 316 317 // innerNode is used to navigate to the leaf node containing a character offset or line number. 318 // 319 // +-----------------------------+ 320 // | child | numKeys | keys[64] | 321 // +-----------------------------+ 322 // 323 // 16 + 8 + 1024 = 1048 bytes 324 type innerNode struct { 325 child nodeGroup 326 numKeys uint64 327 328 // Each key corresponds to a node in the child group. 329 keys [maxKeysPerNode]indexKey 330 } 331 332 func (n *innerNode) key() indexKey { 333 nodeKey := indexKey{} 334 for i := uint64(0); i < n.numKeys; i++ { 335 key := n.keys[i] 336 nodeKey.numChars += key.numChars 337 nodeKey.numNewlines += key.numNewlines 338 } 339 return nodeKey 340 } 341 342 func (n *innerNode) numChars() uint64 { 343 numChars := uint64(0) 344 for i := uint64(0); i < n.numKeys; i++ { 345 numChars += n.keys[i].numChars 346 } 347 return numChars 348 } 349 350 func (n *innerNode) numNewlines() uint64 { 351 numNewlines := uint64(0) 352 for i := uint64(0); i < n.numKeys; i++ { 353 numNewlines += n.keys[i].numNewlines 354 } 355 return numNewlines 356 } 357 358 func (n *innerNode) recalculateChildKeys() { 359 childKeys := n.child.keys() 360 copy(n.keys[:], childKeys) 361 n.numKeys = uint64(len(childKeys)) 362 } 363 364 func (n *innerNode) insertAtPosition(charPos uint64, c rune) (invalidateKeys bool, splitNode *innerNode, err error) { 365 nodeIdx, adjustedCharPos := n.locatePosition(charPos) 366 367 invalidateKeys, splitGroup, err := n.child.insertAtPosition(nodeIdx, adjustedCharPos, c) 368 if err != nil { 369 return false, nil, err 370 } 371 372 if invalidateKeys { 373 n.recalculateChildKeys() 374 } else { 375 key := &n.keys[nodeIdx] 376 key.numChars++ 377 if c == '\n' { 378 key.numNewlines++ 379 } 380 } 381 382 if splitGroup == nil { 383 return false, nil, nil 384 } 385 386 splitNode = &innerNode{child: splitGroup} 387 splitNode.recalculateChildKeys() 388 return true, splitNode, nil 389 } 390 391 func (n *innerNode) deleteAtPosition(charPos uint64) (didDelete, wasNewline bool, r rune) { 392 nodeIdx, adjustedCharPos := n.locatePosition(charPos) 393 didDelete, wasNewline, r = n.child.deleteAtPosition(nodeIdx, adjustedCharPos) 394 if didDelete { 395 n.keys[nodeIdx].numChars-- 396 if wasNewline { 397 n.keys[nodeIdx].numNewlines-- 398 } 399 } 400 return 401 } 402 403 func (n *innerNode) readerAtPosition(charPos uint64) Reader { 404 nodeIdx, adjustedCharPos := n.locatePosition(charPos) 405 return n.child.readerAtPosition(nodeIdx, adjustedCharPos) 406 } 407 408 func (n *innerNode) reverseReaderAtPosition(charPos uint64) ReverseReader { 409 nodeIdx, adjustedCharPos := n.locatePosition(charPos) 410 return n.child.reverseReaderAtPosition(nodeIdx, adjustedCharPos) 411 } 412 413 func (n *innerNode) positionAfterNewline(newlineIdx uint64) uint64 { 414 var charsBefore, newlinesBefore uint64 415 for i := uint64(0); i < n.numKeys-1; i++ { 416 numNewlines := n.keys[i].numNewlines 417 if newlineIdx < newlinesBefore+numNewlines { 418 return charsBefore + n.child.positionAfterNewline(i, newlineIdx-newlinesBefore) 419 } 420 newlinesBefore += numNewlines 421 charsBefore += n.keys[i].numChars 422 } 423 return charsBefore + n.child.positionAfterNewline(n.numKeys-1, newlineIdx-newlinesBefore) 424 } 425 426 func (n *innerNode) numNewlinesBeforePosition(charPos uint64) uint64 { 427 var charsBefore, newlinesBefore uint64 428 for i := uint64(0); i < n.numKeys-1; i++ { 429 numChars := n.keys[i].numChars 430 if charPos < charsBefore+numChars { 431 return newlinesBefore + n.child.numNewlinesBeforePosition(i, charPos-charsBefore) 432 } 433 charsBefore += numChars 434 newlinesBefore += n.keys[i].numNewlines 435 } 436 return newlinesBefore + n.child.numNewlinesBeforePosition(n.numKeys-1, charPos-charsBefore) 437 } 438 439 func (n *innerNode) locatePosition(charPos uint64) (nodeIdx, adjustedCharPos uint64) { 440 c := uint64(0) 441 for i := uint64(0); i < n.numKeys; i++ { 442 nc := n.keys[i].numChars 443 if charPos < c+nc { 444 return i, charPos - c 445 } 446 c += nc 447 } 448 return n.numKeys - 1, c 449 } 450 451 // leafNodeGroup is a group of leaf nodes referenced by an inner node. 452 // These form a doubly-linked list so a reader can scan the text efficiently. 453 type leafNodeGroup struct { 454 prev *leafNodeGroup 455 next *leafNodeGroup 456 numNodes uint64 457 nodes [maxNodesPerGroup]leafNode 458 } 459 460 func (g *leafNodeGroup) keys() []indexKey { 461 keys := make([]indexKey, g.numNodes) 462 for i := uint64(0); i < g.numNodes; i++ { 463 keys[i] = g.nodes[i].key() 464 } 465 return keys 466 } 467 468 func (g *leafNodeGroup) insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error) { 469 splitNode, err := g.nodes[nodeIdx].insertAtPosition(charPos, c) 470 if err != nil { 471 return false, nil, err 472 } 473 474 if splitNode == nil { 475 return false, nil, nil 476 } 477 478 splitNodeIdx := nodeIdx + 1 479 if g.numNodes < maxNodesPerGroup { 480 g.insertNode(splitNodeIdx, splitNode) 481 return true, nil, nil 482 } 483 484 splitGroup := g.split() 485 if splitNodeIdx < g.numNodes { 486 g.insertNode(splitNodeIdx, splitNode) 487 } else { 488 splitGroup.insertNode(splitNodeIdx-g.numNodes, splitNode) 489 } 490 return true, splitGroup, nil 491 } 492 493 func (g *leafNodeGroup) insertNode(nodeIdx uint64, node *leafNode) { 494 for i := int(g.numNodes); i > int(nodeIdx); i-- { 495 g.nodes[i] = g.nodes[i-1] 496 } 497 g.nodes[nodeIdx] = *node 498 g.numNodes++ 499 } 500 501 func (g *leafNodeGroup) split() *leafNodeGroup { 502 mid := g.numNodes / 2 503 splitGroup := &leafNodeGroup{numNodes: g.numNodes - mid} 504 for i := uint64(0); i < splitGroup.numNodes; i++ { 505 splitGroup.nodes[i] = g.nodes[mid+i] 506 } 507 g.numNodes = mid 508 if g.next != nil { 509 g.next.prev = splitGroup 510 splitGroup.next = g.next 511 } 512 splitGroup.prev = g 513 g.next = splitGroup 514 return splitGroup 515 } 516 517 func (g *leafNodeGroup) deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune) { 518 // Don't bother rebalancing the tree. This leaves extra space in the leaves, 519 // but that's okay because usually the user will want to insert more text anyway. 520 return g.nodes[nodeIdx].deleteAtPosition(charPos) 521 } 522 523 func (g *leafNodeGroup) readerAtPosition(nodeIdx uint64, charPos uint64) Reader { 524 textByteOffset := g.nodes[nodeIdx].byteOffsetForPosition(charPos) 525 return Reader{ 526 group: g, 527 nodeIdx: nodeIdx, 528 textByteOffset: textByteOffset, 529 } 530 } 531 532 func (g *leafNodeGroup) reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader { 533 textByteOffset := g.nodes[nodeIdx].byteOffsetForPosition(charPos) 534 return ReverseReader{ 535 Reader{ 536 group: g, 537 nodeIdx: nodeIdx, 538 textByteOffset: textByteOffset, 539 }, 540 } 541 } 542 543 func (g *leafNodeGroup) positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64 { 544 return g.nodes[nodeIdx].positionAfterNewline(newlineIdx) 545 } 546 547 func (g *leafNodeGroup) numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64 { 548 return g.nodes[nodeIdx].numNewlinesBeforePosition(charPos) 549 } 550 551 // leafNode is a node that stores UTF-8 text as a byte array. 552 // 553 // Multi-byte UTF-8 characters are never split between leaf nodes. 554 // 555 // +---------------------------------+ 556 // | numBytes | textBytes[63] | 557 // +---------------------------------+ 558 // 559 // 1 + 63 = 64 bytes 560 type leafNode struct { 561 numBytes byte 562 textBytes [maxBytesPerLeaf]byte 563 } 564 565 func (l *leafNode) key() indexKey { 566 key := indexKey{} 567 for _, b := range l.textBytes[:l.numBytes] { 568 key.numChars += uint64(textUtf8.StartByteIndicator[b]) 569 if b == '\n' { 570 key.numNewlines++ 571 } 572 } 573 return key 574 } 575 576 func (l *leafNode) insertAtPosition(charPos uint64, c rune) (*leafNode, error) { 577 w := utf8.RuneLen(c) 578 if w < 0 { 579 return nil, ErrInvalidUtf8 580 } 581 582 charWidth := uint64(w) 583 584 if uint64(l.numBytes)+charWidth <= maxBytesPerLeaf { 585 l.insertAtPositionNoSplit(charPos, charWidth, c) 586 return nil, nil 587 } 588 589 splitNode, numCharsRemaining := l.split() 590 if charPos < numCharsRemaining { 591 l.insertAtPositionNoSplit(charPos, charWidth, c) 592 } else { 593 splitNode.insertAtPositionNoSplit(charPos-numCharsRemaining, charWidth, c) 594 } 595 596 return splitNode, nil 597 } 598 599 func (l *leafNode) insertAtPositionNoSplit(charPos uint64, charWidth uint64, c rune) { 600 offset := l.byteOffsetForPosition(charPos) 601 l.numBytes += byte(charWidth) 602 for i := int(l.numBytes) - 1; i >= int(offset+charWidth); i-- { 603 l.textBytes[i] = l.textBytes[i-int(charWidth)] 604 } 605 utf8.EncodeRune(l.textBytes[offset:], c) 606 } 607 608 func (l *leafNode) split() (*leafNode, uint64) { 609 splitIdx, numCharsBeforeSplit := l.splitIdx() 610 splitNode := leafNode{numBytes: l.numBytes - splitIdx} 611 for i := byte(0); i < splitNode.numBytes; i++ { 612 splitNode.textBytes[i] = l.textBytes[i+splitIdx] 613 } 614 l.numBytes = splitIdx 615 return &splitNode, uint64(numCharsBeforeSplit) 616 } 617 618 func (l *leafNode) splitIdx() (splitIdx, numCharsBeforeSplit byte) { 619 mid := l.numBytes / 2 620 for i := byte(0); i < l.numBytes; i++ { 621 b := l.textBytes[i] 622 isStartByte := textUtf8.StartByteIndicator[b] > 0 623 if i > mid && isStartByte { 624 return i, numCharsBeforeSplit 625 } else if isStartByte { 626 numCharsBeforeSplit++ 627 } 628 } 629 return l.numBytes, numCharsBeforeSplit 630 } 631 632 func (l *leafNode) deleteAtPosition(charPos uint64) (didDelete, wasNewline bool, r rune) { 633 offset := l.byteOffsetForPosition(charPos) 634 if offset < uint64(l.numBytes) { 635 startByte := l.textBytes[offset] 636 charWidth := textUtf8.CharWidth[startByte] 637 r, _ = utf8.DecodeRune(l.textBytes[offset : offset+uint64(charWidth)]) 638 for i := offset; i < uint64(l.numBytes-charWidth); i++ { 639 l.textBytes[i] = l.textBytes[i+uint64(charWidth)] 640 } 641 l.numBytes -= charWidth 642 didDelete = true 643 wasNewline = startByte == '\n' 644 } 645 return 646 } 647 648 func (l *leafNode) byteOffsetForPosition(charPos uint64) uint64 { 649 n := uint64(0) 650 for i, b := range l.textBytes[:l.numBytes] { 651 c := uint64(textUtf8.StartByteIndicator[b]) 652 if c > 0 && n == charPos { 653 return uint64(i) 654 } 655 n += c 656 } 657 return uint64(l.numBytes) 658 } 659 660 func (l *leafNode) positionAfterNewline(newlineIdx uint64) uint64 { 661 var newlineCount, pos uint64 662 for _, b := range l.textBytes[:l.numBytes] { 663 if b == '\n' { 664 if newlineIdx == newlineCount { 665 return pos + 1 666 } 667 newlineCount++ 668 } 669 if textUtf8.StartByteIndicator[b] > 0 { 670 pos++ 671 } 672 } 673 return pos 674 } 675 676 func (l *leafNode) numNewlinesBeforePosition(charPos uint64) uint64 { 677 var newlineCount, pos uint64 678 for _, b := range l.textBytes[:l.numBytes] { 679 if pos == charPos { 680 break 681 } 682 if b == '\n' { 683 newlineCount++ 684 } 685 if textUtf8.StartByteIndicator[b] > 0 { 686 pos++ 687 } 688 } 689 return newlineCount 690 }