github.com/aretext/aretext@v1.3.0/text/tree.go

github.com/aretext/aretext@v1.3.0/text/tree.go (about)

     1  package text
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"strings"
     7  	"unicode/utf8"
     8  
     9  	textUtf8 "github.com/aretext/aretext/text/utf8"
    10  )
    11  
    12  var (
    13  	ErrInvalidUtf8 = fmt.Errorf("invalid UTF-8")
    14  )
    15  
    16  // text.Tree is a data structure for representing UTF-8 text.
    17  // It supports efficient insertions, deletions, and lookup by character offset and line number.
    18  // It is inspired by two papers:
    19  // Boehm, H. J., Atkinson, R., & Plass, M. (1995). Ropes: an alternative to strings. Software: Practice and Experience, 25(12), 1315-1330.
    20  // Rao, J., & Ross, K. A. (2000, May). Making B+-trees cache conscious in main memory. In Proceedings of the 2000 ACM SIGMOD international conference on Management of data (pp. 475-486).
    21  // Like a rope, the tree maintains character counts at each level to efficiently locate a character at a given offset.
    22  // To use the CPU cache efficiently, all children of a node are pre-allocated in a group (what the Rao & Ross paper calls a "full" cache-sensitive B+ tree),
    23  // and the parent uses offsets within the node group to identify child nodes.
    24  // All nodes are carefully designed to fit as much data as possible within a 64-byte cache line.
    25  type Tree struct {
    26  	root *innerNode
    27  }
    28  
    29  // NewTree returns a tree representing an empty string.
    30  func NewTree() *Tree {
    31  	root := &innerNode{numKeys: 1}
    32  	root.child = &leafNodeGroup{numNodes: 1}
    33  	return &Tree{root}
    34  }
    35  
    36  // NewTreeFromReader creates a new Tree from a reader that produces UTF-8 text.
    37  // This is more efficient than inserting the bytes into an empty tree.
    38  // Returns an error if the bytes are invalid UTF-8.
    39  func NewTreeFromReader(r io.Reader) (*Tree, error) {
    40  	leafGroups, err := bulkLoadIntoLeaves(r)
    41  	if err != nil {
    42  		return nil, err
    43  	}
    44  	root := buildTreeFromLeaves(leafGroups)
    45  	return &Tree{root}, nil
    46  }
    47  
    48  // NewTreeFromString creates a new Tree from a UTF-8 string.
    49  func NewTreeFromString(s string) (*Tree, error) {
    50  	reader := strings.NewReader(s)
    51  	return NewTreeFromReader(reader)
    52  }
    53  
    54  func bulkLoadIntoLeaves(r io.Reader) ([]nodeGroup, error) {
    55  	v := textUtf8.NewValidator()
    56  	leafGroups := make([]nodeGroup, 0, 1)
    57  	currentGroup := &leafNodeGroup{numNodes: 1}
    58  	currentNode := &currentGroup.nodes[0]
    59  	leafGroups = append(leafGroups, currentGroup)
    60  
    61  	var buf [1024]byte
    62  	for {
    63  		n, err := r.Read(buf[:])
    64  		if err != nil && err != io.EOF {
    65  			return nil, err
    66  		}
    67  
    68  		if n == 0 {
    69  			break
    70  		}
    71  
    72  		if !v.ValidateBytes(buf[:n]) {
    73  			return nil, ErrInvalidUtf8
    74  		}
    75  
    76  		for i := 0; i < n; i++ {
    77  			charWidth := textUtf8.CharWidth[buf[i]] // zero for continuation bytes
    78  			if currentNode.numBytes+charWidth >= maxBytesPerLeaf {
    79  				if currentGroup.numNodes < maxNodesPerGroup {
    80  					currentNode = &currentGroup.nodes[currentGroup.numNodes]
    81  					currentGroup.numNodes++
    82  				} else {
    83  					newGroup := &leafNodeGroup{numNodes: 1}
    84  					leafGroups = append(leafGroups, newGroup)
    85  					newGroup.prev = currentGroup
    86  					currentGroup.next = newGroup
    87  					currentGroup = newGroup
    88  					currentNode = &currentGroup.nodes[0]
    89  				}
    90  			}
    91  
    92  			currentNode.textBytes[currentNode.numBytes] = buf[i]
    93  			currentNode.numBytes++
    94  		}
    95  	}
    96  
    97  	if !v.ValidateEnd() {
    98  		return nil, ErrInvalidUtf8
    99  	}
   100  
   101  	return leafGroups, nil
   102  }
   103  
   104  func buildTreeFromLeaves(leafGroups []nodeGroup) *innerNode {
   105  	childGroups := leafGroups
   106  
   107  	for {
   108  		parentGroups := make([]nodeGroup, 0, len(childGroups)/maxNodesPerGroup+1)
   109  		currentGroup := &innerNodeGroup{}
   110  		parentGroups = append(parentGroups, currentGroup)
   111  
   112  		for _, cg := range childGroups {
   113  			if currentGroup.numNodes == maxNodesPerGroup {
   114  				newGroup := &innerNodeGroup{}
   115  				parentGroups = append(parentGroups, newGroup)
   116  				currentGroup = newGroup
   117  			}
   118  
   119  			innerNode := &currentGroup.nodes[currentGroup.numNodes]
   120  			innerNode.child = cg
   121  			innerNode.recalculateChildKeys()
   122  			currentGroup.numNodes++
   123  		}
   124  
   125  		if len(parentGroups) == 1 {
   126  			root := innerNode{child: parentGroups[0]}
   127  			root.recalculateChildKeys()
   128  			return &root
   129  		}
   130  
   131  		childGroups = parentGroups
   132  	}
   133  }
   134  
   135  // NumChars returns the total number of characters (runes) in the tree.
   136  func (t *Tree) NumChars() uint64 {
   137  	return t.root.numChars()
   138  }
   139  
   140  // NumLines returns the total number of lines in the tree.
   141  func (t *Tree) NumLines() uint64 {
   142  	return t.root.numNewlines() + 1
   143  }
   144  
   145  // InsertAtPosition inserts a UTF-8 character at the specified position (0-indexed).
   146  // If charPos is past the end of the text, it will be appended at the end.
   147  // Returns an error if c is not a valid UTF-8 character.
   148  func (t *Tree) InsertAtPosition(charPos uint64, c rune) error {
   149  	invalidateKeys, splitNode, err := t.root.insertAtPosition(charPos, c)
   150  	if err != nil {
   151  		return err
   152  	}
   153  
   154  	if invalidateKeys {
   155  		t.root.recalculateChildKeys()
   156  	}
   157  
   158  	if splitNode != nil {
   159  		newGroup := innerNodeGroup{numNodes: 2}
   160  		newGroup.nodes[0] = *t.root
   161  		newGroup.nodes[1] = *splitNode
   162  
   163  		t.root = &innerNode{child: &newGroup}
   164  		t.root.recalculateChildKeys()
   165  	}
   166  
   167  	return nil
   168  }
   169  
   170  // DeleteAtPosition removes the UTF-8 character at the specified position (0-indexed).
   171  // If charPos is past the end of the text, this has no effect.
   172  func (t *Tree) DeleteAtPosition(charPos uint64) (bool, rune) {
   173  	didDelete, _, r := t.root.deleteAtPosition(charPos)
   174  	return didDelete, r
   175  }
   176  
   177  // ReaderAtPosition returns a reader starting at the UTF-8 character at the specified position (0-indexed).
   178  // If the position is past the end of the text, the returned reader will read zero bytes.
   179  func (t *Tree) ReaderAtPosition(charPos uint64) Reader {
   180  	return t.root.readerAtPosition(charPos)
   181  }
   182  
   183  // ReverseReaderAtPosition returns a reverse reader starting at the specified position.
   184  func (t *Tree) ReverseReaderAtPosition(charPos uint64) ReverseReader {
   185  	return t.root.reverseReaderAtPosition(charPos)
   186  }
   187  
   188  // LineStartPosition returns the position of the first character at the specified line (0-indexed).
   189  // If the line number is greater than the maximum line number, returns one past the position of the last character.
   190  func (t *Tree) LineStartPosition(lineNum uint64) uint64 {
   191  	if lineNum == 0 {
   192  		// Special case the first line, since it's the only line that doesn't immediately follow a newline character.
   193  		return 0
   194  	}
   195  
   196  	return t.root.positionAfterNewline(lineNum - 1)
   197  }
   198  
   199  // LineNumForPosition returns the line number (0-indexed) for the line containing the specified position.
   200  func (t *Tree) LineNumForPosition(charPos uint64) uint64 {
   201  	return t.root.numNewlinesBeforePosition(charPos)
   202  }
   203  
   204  // String returns the text in the tree as a string.
   205  func (t *Tree) String() string {
   206  	reader := t.ReaderAtPosition(0)
   207  	retrievedBytes, err := io.ReadAll(&reader)
   208  	if err != nil {
   209  		panic("Unexpected error reading bytes from text.Tree")
   210  	}
   211  	return string(retrievedBytes)
   212  }
   213  
   214  const maxKeysPerNode = 64
   215  const maxNodesPerGroup = maxKeysPerNode
   216  const maxBytesPerLeaf = 63
   217  
   218  // nodeGroup is either an inner node group or a leaf node group.
   219  type nodeGroup interface {
   220  	keys() []indexKey
   221  	insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error)
   222  	deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune)
   223  	readerAtPosition(nodeIdx uint64, charPos uint64) Reader
   224  	reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader
   225  	positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64
   226  	numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64
   227  }
   228  
   229  // indexKey is used to navigate from an inner node to the child node containing a particular line or character offset.
   230  type indexKey struct {
   231  
   232  	// Number of UTF-8 characters in a subtree.
   233  	numChars uint64
   234  
   235  	// Number of newline characters in a subtree.
   236  	numNewlines uint64
   237  }
   238  
   239  // innerNodeGroup is a group of inner nodes referenced by a parent inner node.
   240  type innerNodeGroup struct {
   241  	numNodes uint64
   242  	nodes    [maxNodesPerGroup]innerNode
   243  }
   244  
   245  func (g *innerNodeGroup) keys() []indexKey {
   246  	keys := make([]indexKey, g.numNodes)
   247  	for i := uint64(0); i < g.numNodes; i++ {
   248  		keys[i] = g.nodes[i].key()
   249  	}
   250  	return keys
   251  }
   252  
   253  func (g *innerNodeGroup) insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error) {
   254  	_, splitNode, err := g.nodes[nodeIdx].insertAtPosition(charPos, c)
   255  	if err != nil {
   256  		return false, nil, err
   257  	}
   258  
   259  	if splitNode == nil {
   260  		return false, nil, nil
   261  	}
   262  
   263  	splitIdx := nodeIdx + 1
   264  	if g.numNodes < maxNodesPerGroup {
   265  		g.insertNode(splitIdx, splitNode)
   266  		return true, nil, nil
   267  	}
   268  
   269  	splitGroup := g.split()
   270  	if splitIdx < g.numNodes {
   271  		g.insertNode(splitIdx, splitNode)
   272  	} else {
   273  		splitGroup.insertNode(splitIdx-g.numNodes, splitNode)
   274  	}
   275  
   276  	return true, splitGroup, nil
   277  }
   278  
   279  func (g *innerNodeGroup) insertNode(nodeIdx uint64, node *innerNode) {
   280  	for i := int(g.numNodes); i > int(nodeIdx); i-- {
   281  		g.nodes[i] = g.nodes[i-1]
   282  	}
   283  	g.nodes[nodeIdx] = *node
   284  	g.numNodes++
   285  }
   286  
   287  func (g *innerNodeGroup) split() *innerNodeGroup {
   288  	mid := g.numNodes / 2
   289  	splitGroup := innerNodeGroup{numNodes: g.numNodes - mid}
   290  	for i := uint64(0); i < splitGroup.numNodes; i++ {
   291  		splitGroup.nodes[i] = g.nodes[mid+i]
   292  	}
   293  	g.numNodes = mid
   294  	return &splitGroup
   295  }
   296  
   297  func (g *innerNodeGroup) deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune) {
   298  	return g.nodes[nodeIdx].deleteAtPosition(charPos)
   299  }
   300  
   301  func (g *innerNodeGroup) readerAtPosition(nodeIdx uint64, charPos uint64) Reader {
   302  	return g.nodes[nodeIdx].readerAtPosition(charPos)
   303  }
   304  
   305  func (g *innerNodeGroup) reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader {
   306  	return g.nodes[nodeIdx].reverseReaderAtPosition(charPos)
   307  }
   308  
   309  func (g *innerNodeGroup) positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64 {
   310  	return g.nodes[nodeIdx].positionAfterNewline(newlineIdx)
   311  }
   312  
   313  func (g *innerNodeGroup) numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64 {
   314  	return g.nodes[nodeIdx].numNewlinesBeforePosition(charPos)
   315  }
   316  
   317  // innerNode is used to navigate to the leaf node containing a character offset or line number.
   318  //
   319  // +-----------------------------+
   320  // | child | numKeys |  keys[64] |
   321  // +-----------------------------+
   322  //
   323  //	16 + 8 + 1024 = 1048 bytes
   324  type innerNode struct {
   325  	child   nodeGroup
   326  	numKeys uint64
   327  
   328  	// Each key corresponds to a node in the child group.
   329  	keys [maxKeysPerNode]indexKey
   330  }
   331  
   332  func (n *innerNode) key() indexKey {
   333  	nodeKey := indexKey{}
   334  	for i := uint64(0); i < n.numKeys; i++ {
   335  		key := n.keys[i]
   336  		nodeKey.numChars += key.numChars
   337  		nodeKey.numNewlines += key.numNewlines
   338  	}
   339  	return nodeKey
   340  }
   341  
   342  func (n *innerNode) numChars() uint64 {
   343  	numChars := uint64(0)
   344  	for i := uint64(0); i < n.numKeys; i++ {
   345  		numChars += n.keys[i].numChars
   346  	}
   347  	return numChars
   348  }
   349  
   350  func (n *innerNode) numNewlines() uint64 {
   351  	numNewlines := uint64(0)
   352  	for i := uint64(0); i < n.numKeys; i++ {
   353  		numNewlines += n.keys[i].numNewlines
   354  	}
   355  	return numNewlines
   356  }
   357  
   358  func (n *innerNode) recalculateChildKeys() {
   359  	childKeys := n.child.keys()
   360  	copy(n.keys[:], childKeys)
   361  	n.numKeys = uint64(len(childKeys))
   362  }
   363  
   364  func (n *innerNode) insertAtPosition(charPos uint64, c rune) (invalidateKeys bool, splitNode *innerNode, err error) {
   365  	nodeIdx, adjustedCharPos := n.locatePosition(charPos)
   366  
   367  	invalidateKeys, splitGroup, err := n.child.insertAtPosition(nodeIdx, adjustedCharPos, c)
   368  	if err != nil {
   369  		return false, nil, err
   370  	}
   371  
   372  	if invalidateKeys {
   373  		n.recalculateChildKeys()
   374  	} else {
   375  		key := &n.keys[nodeIdx]
   376  		key.numChars++
   377  		if c == '\n' {
   378  			key.numNewlines++
   379  		}
   380  	}
   381  
   382  	if splitGroup == nil {
   383  		return false, nil, nil
   384  	}
   385  
   386  	splitNode = &innerNode{child: splitGroup}
   387  	splitNode.recalculateChildKeys()
   388  	return true, splitNode, nil
   389  }
   390  
   391  func (n *innerNode) deleteAtPosition(charPos uint64) (didDelete, wasNewline bool, r rune) {
   392  	nodeIdx, adjustedCharPos := n.locatePosition(charPos)
   393  	didDelete, wasNewline, r = n.child.deleteAtPosition(nodeIdx, adjustedCharPos)
   394  	if didDelete {
   395  		n.keys[nodeIdx].numChars--
   396  		if wasNewline {
   397  			n.keys[nodeIdx].numNewlines--
   398  		}
   399  	}
   400  	return
   401  }
   402  
   403  func (n *innerNode) readerAtPosition(charPos uint64) Reader {
   404  	nodeIdx, adjustedCharPos := n.locatePosition(charPos)
   405  	return n.child.readerAtPosition(nodeIdx, adjustedCharPos)
   406  }
   407  
   408  func (n *innerNode) reverseReaderAtPosition(charPos uint64) ReverseReader {
   409  	nodeIdx, adjustedCharPos := n.locatePosition(charPos)
   410  	return n.child.reverseReaderAtPosition(nodeIdx, adjustedCharPos)
   411  }
   412  
   413  func (n *innerNode) positionAfterNewline(newlineIdx uint64) uint64 {
   414  	var charsBefore, newlinesBefore uint64
   415  	for i := uint64(0); i < n.numKeys-1; i++ {
   416  		numNewlines := n.keys[i].numNewlines
   417  		if newlineIdx < newlinesBefore+numNewlines {
   418  			return charsBefore + n.child.positionAfterNewline(i, newlineIdx-newlinesBefore)
   419  		}
   420  		newlinesBefore += numNewlines
   421  		charsBefore += n.keys[i].numChars
   422  	}
   423  	return charsBefore + n.child.positionAfterNewline(n.numKeys-1, newlineIdx-newlinesBefore)
   424  }
   425  
   426  func (n *innerNode) numNewlinesBeforePosition(charPos uint64) uint64 {
   427  	var charsBefore, newlinesBefore uint64
   428  	for i := uint64(0); i < n.numKeys-1; i++ {
   429  		numChars := n.keys[i].numChars
   430  		if charPos < charsBefore+numChars {
   431  			return newlinesBefore + n.child.numNewlinesBeforePosition(i, charPos-charsBefore)
   432  		}
   433  		charsBefore += numChars
   434  		newlinesBefore += n.keys[i].numNewlines
   435  	}
   436  	return newlinesBefore + n.child.numNewlinesBeforePosition(n.numKeys-1, charPos-charsBefore)
   437  }
   438  
   439  func (n *innerNode) locatePosition(charPos uint64) (nodeIdx, adjustedCharPos uint64) {
   440  	c := uint64(0)
   441  	for i := uint64(0); i < n.numKeys; i++ {
   442  		nc := n.keys[i].numChars
   443  		if charPos < c+nc {
   444  			return i, charPos - c
   445  		}
   446  		c += nc
   447  	}
   448  	return n.numKeys - 1, c
   449  }
   450  
   451  // leafNodeGroup is a group of leaf nodes referenced by an inner node.
   452  // These form a doubly-linked list so a reader can scan the text efficiently.
   453  type leafNodeGroup struct {
   454  	prev     *leafNodeGroup
   455  	next     *leafNodeGroup
   456  	numNodes uint64
   457  	nodes    [maxNodesPerGroup]leafNode
   458  }
   459  
   460  func (g *leafNodeGroup) keys() []indexKey {
   461  	keys := make([]indexKey, g.numNodes)
   462  	for i := uint64(0); i < g.numNodes; i++ {
   463  		keys[i] = g.nodes[i].key()
   464  	}
   465  	return keys
   466  }
   467  
   468  func (g *leafNodeGroup) insertAtPosition(nodeIdx uint64, charPos uint64, c rune) (invalidateKeys bool, splitNodeGroup nodeGroup, err error) {
   469  	splitNode, err := g.nodes[nodeIdx].insertAtPosition(charPos, c)
   470  	if err != nil {
   471  		return false, nil, err
   472  	}
   473  
   474  	if splitNode == nil {
   475  		return false, nil, nil
   476  	}
   477  
   478  	splitNodeIdx := nodeIdx + 1
   479  	if g.numNodes < maxNodesPerGroup {
   480  		g.insertNode(splitNodeIdx, splitNode)
   481  		return true, nil, nil
   482  	}
   483  
   484  	splitGroup := g.split()
   485  	if splitNodeIdx < g.numNodes {
   486  		g.insertNode(splitNodeIdx, splitNode)
   487  	} else {
   488  		splitGroup.insertNode(splitNodeIdx-g.numNodes, splitNode)
   489  	}
   490  	return true, splitGroup, nil
   491  }
   492  
   493  func (g *leafNodeGroup) insertNode(nodeIdx uint64, node *leafNode) {
   494  	for i := int(g.numNodes); i > int(nodeIdx); i-- {
   495  		g.nodes[i] = g.nodes[i-1]
   496  	}
   497  	g.nodes[nodeIdx] = *node
   498  	g.numNodes++
   499  }
   500  
   501  func (g *leafNodeGroup) split() *leafNodeGroup {
   502  	mid := g.numNodes / 2
   503  	splitGroup := &leafNodeGroup{numNodes: g.numNodes - mid}
   504  	for i := uint64(0); i < splitGroup.numNodes; i++ {
   505  		splitGroup.nodes[i] = g.nodes[mid+i]
   506  	}
   507  	g.numNodes = mid
   508  	if g.next != nil {
   509  		g.next.prev = splitGroup
   510  		splitGroup.next = g.next
   511  	}
   512  	splitGroup.prev = g
   513  	g.next = splitGroup
   514  	return splitGroup
   515  }
   516  
   517  func (g *leafNodeGroup) deleteAtPosition(nodeIdx uint64, charPos uint64) (didDelete, wasNewline bool, r rune) {
   518  	// Don't bother rebalancing the tree.  This leaves extra space in the leaves,
   519  	// but that's okay because usually the user will want to insert more text anyway.
   520  	return g.nodes[nodeIdx].deleteAtPosition(charPos)
   521  }
   522  
   523  func (g *leafNodeGroup) readerAtPosition(nodeIdx uint64, charPos uint64) Reader {
   524  	textByteOffset := g.nodes[nodeIdx].byteOffsetForPosition(charPos)
   525  	return Reader{
   526  		group:          g,
   527  		nodeIdx:        nodeIdx,
   528  		textByteOffset: textByteOffset,
   529  	}
   530  }
   531  
   532  func (g *leafNodeGroup) reverseReaderAtPosition(nodeIdx uint64, charPos uint64) ReverseReader {
   533  	textByteOffset := g.nodes[nodeIdx].byteOffsetForPosition(charPos)
   534  	return ReverseReader{
   535  		Reader{
   536  			group:          g,
   537  			nodeIdx:        nodeIdx,
   538  			textByteOffset: textByteOffset,
   539  		},
   540  	}
   541  }
   542  
   543  func (g *leafNodeGroup) positionAfterNewline(nodeIdx uint64, newlineIdx uint64) uint64 {
   544  	return g.nodes[nodeIdx].positionAfterNewline(newlineIdx)
   545  }
   546  
   547  func (g *leafNodeGroup) numNewlinesBeforePosition(nodeIdx uint64, charPos uint64) uint64 {
   548  	return g.nodes[nodeIdx].numNewlinesBeforePosition(charPos)
   549  }
   550  
   551  // leafNode is a node that stores UTF-8 text as a byte array.
   552  //
   553  // Multi-byte UTF-8 characters are never split between leaf nodes.
   554  //
   555  // +---------------------------------+
   556  // |   numBytes  |   textBytes[63]   |
   557  // +---------------------------------+
   558  //
   559  //	1 + 63 = 64 bytes
   560  type leafNode struct {
   561  	numBytes  byte
   562  	textBytes [maxBytesPerLeaf]byte
   563  }
   564  
   565  func (l *leafNode) key() indexKey {
   566  	key := indexKey{}
   567  	for _, b := range l.textBytes[:l.numBytes] {
   568  		key.numChars += uint64(textUtf8.StartByteIndicator[b])
   569  		if b == '\n' {
   570  			key.numNewlines++
   571  		}
   572  	}
   573  	return key
   574  }
   575  
   576  func (l *leafNode) insertAtPosition(charPos uint64, c rune) (*leafNode, error) {
   577  	w := utf8.RuneLen(c)
   578  	if w < 0 {
   579  		return nil, ErrInvalidUtf8
   580  	}
   581  
   582  	charWidth := uint64(w)
   583  
   584  	if uint64(l.numBytes)+charWidth <= maxBytesPerLeaf {
   585  		l.insertAtPositionNoSplit(charPos, charWidth, c)
   586  		return nil, nil
   587  	}
   588  
   589  	splitNode, numCharsRemaining := l.split()
   590  	if charPos < numCharsRemaining {
   591  		l.insertAtPositionNoSplit(charPos, charWidth, c)
   592  	} else {
   593  		splitNode.insertAtPositionNoSplit(charPos-numCharsRemaining, charWidth, c)
   594  	}
   595  
   596  	return splitNode, nil
   597  }
   598  
   599  func (l *leafNode) insertAtPositionNoSplit(charPos uint64, charWidth uint64, c rune) {
   600  	offset := l.byteOffsetForPosition(charPos)
   601  	l.numBytes += byte(charWidth)
   602  	for i := int(l.numBytes) - 1; i >= int(offset+charWidth); i-- {
   603  		l.textBytes[i] = l.textBytes[i-int(charWidth)]
   604  	}
   605  	utf8.EncodeRune(l.textBytes[offset:], c)
   606  }
   607  
   608  func (l *leafNode) split() (*leafNode, uint64) {
   609  	splitIdx, numCharsBeforeSplit := l.splitIdx()
   610  	splitNode := leafNode{numBytes: l.numBytes - splitIdx}
   611  	for i := byte(0); i < splitNode.numBytes; i++ {
   612  		splitNode.textBytes[i] = l.textBytes[i+splitIdx]
   613  	}
   614  	l.numBytes = splitIdx
   615  	return &splitNode, uint64(numCharsBeforeSplit)
   616  }
   617  
   618  func (l *leafNode) splitIdx() (splitIdx, numCharsBeforeSplit byte) {
   619  	mid := l.numBytes / 2
   620  	for i := byte(0); i < l.numBytes; i++ {
   621  		b := l.textBytes[i]
   622  		isStartByte := textUtf8.StartByteIndicator[b] > 0
   623  		if i > mid && isStartByte {
   624  			return i, numCharsBeforeSplit
   625  		} else if isStartByte {
   626  			numCharsBeforeSplit++
   627  		}
   628  	}
   629  	return l.numBytes, numCharsBeforeSplit
   630  }
   631  
   632  func (l *leafNode) deleteAtPosition(charPos uint64) (didDelete, wasNewline bool, r rune) {
   633  	offset := l.byteOffsetForPosition(charPos)
   634  	if offset < uint64(l.numBytes) {
   635  		startByte := l.textBytes[offset]
   636  		charWidth := textUtf8.CharWidth[startByte]
   637  		r, _ = utf8.DecodeRune(l.textBytes[offset : offset+uint64(charWidth)])
   638  		for i := offset; i < uint64(l.numBytes-charWidth); i++ {
   639  			l.textBytes[i] = l.textBytes[i+uint64(charWidth)]
   640  		}
   641  		l.numBytes -= charWidth
   642  		didDelete = true
   643  		wasNewline = startByte == '\n'
   644  	}
   645  	return
   646  }
   647  
   648  func (l *leafNode) byteOffsetForPosition(charPos uint64) uint64 {
   649  	n := uint64(0)
   650  	for i, b := range l.textBytes[:l.numBytes] {
   651  		c := uint64(textUtf8.StartByteIndicator[b])
   652  		if c > 0 && n == charPos {
   653  			return uint64(i)
   654  		}
   655  		n += c
   656  	}
   657  	return uint64(l.numBytes)
   658  }
   659  
   660  func (l *leafNode) positionAfterNewline(newlineIdx uint64) uint64 {
   661  	var newlineCount, pos uint64
   662  	for _, b := range l.textBytes[:l.numBytes] {
   663  		if b == '\n' {
   664  			if newlineIdx == newlineCount {
   665  				return pos + 1
   666  			}
   667  			newlineCount++
   668  		}
   669  		if textUtf8.StartByteIndicator[b] > 0 {
   670  			pos++
   671  		}
   672  	}
   673  	return pos
   674  }
   675  
   676  func (l *leafNode) numNewlinesBeforePosition(charPos uint64) uint64 {
   677  	var newlineCount, pos uint64
   678  	for _, b := range l.textBytes[:l.numBytes] {
   679  		if pos == charPos {
   680  			break
   681  		}
   682  		if b == '\n' {
   683  			newlineCount++
   684  		}
   685  		if textUtf8.StartByteIndicator[b] > 0 {
   686  			pos++
   687  		}
   688  	}
   689  	return newlineCount
   690  }