kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/span/span.go (about)

     1  /*
     2   * Copyright 2018 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package span implements utilities to resolve byte offsets within a file to
    18  // line and column numbers.
    19  package span // import "kythe.io/kythe/go/util/span"
    20  
    21  import (
    22  	"bytes"
    23  	"errors"
    24  	"fmt"
    25  	"sort"
    26  
    27  	"kythe.io/kythe/go/util/log"
    28  
    29  	"github.com/sergi/go-diff/diffmatchpatch"
    30  	"google.golang.org/protobuf/proto"
    31  
    32  	cpb "kythe.io/kythe/proto/common_go_proto"
    33  	srvpb "kythe.io/kythe/proto/serving_go_proto"
    34  	xpb "kythe.io/kythe/proto/xref_go_proto"
    35  )
    36  
    37  // InBounds reports whether [start,end) is bounded by the specified [startBoundary,endBoundary) span.
    38  func InBounds(kind xpb.DecorationsRequest_SpanKind, start, end, startBoundary, endBoundary int32) bool {
    39  	switch kind {
    40  	case xpb.DecorationsRequest_WITHIN_SPAN:
    41  		return start >= startBoundary && end <= endBoundary
    42  	case xpb.DecorationsRequest_AROUND_SPAN:
    43  		return start <= startBoundary && end >= endBoundary
    44  	default:
    45  		log.Warningf("unknown DecorationsRequest_SpanKind: %v", kind)
    46  	}
    47  	return false
    48  }
    49  
    50  // Patcher uses a computed diff between two texts to map spans from the original
    51  // text to the new text.
    52  type Patcher struct {
    53  	spans []diff
    54  }
    55  
    56  // NewPatcher returns a Patcher based on the diff between oldText and newText.
    57  func NewPatcher(oldText, newText []byte) (p *Patcher, err error) {
    58  	defer func() {
    59  		// dmp may panic on some large requests; catch it and return an error instead
    60  		if r := recover(); r != nil {
    61  			err = fmt.Errorf("diffmatchpatch panic: %v", r)
    62  		}
    63  	}()
    64  	dmp := diffmatchpatch.New()
    65  	diff := dmp.DiffCleanupEfficiency(dmp.DiffMain(string(oldText), string(newText), false))
    66  	return &Patcher{mapToOffsets(diff)}, nil
    67  }
    68  
    69  // Marshal encodes the Patcher into a packed binary format.
    70  func (p *Patcher) Marshal() ([]byte, error) {
    71  	db := &srvpb.Diff{
    72  		SpanLength:       make([]int32, len(p.spans)),
    73  		SpanType:         make([]srvpb.Diff_Type, len(p.spans)),
    74  		SpanNewlines:     make([]int32, len(p.spans)),
    75  		SpanFirstNewline: make([]int32, len(p.spans)),
    76  		SpanLastNewline:  make([]int32, len(p.spans)),
    77  	}
    78  	for i, d := range p.spans {
    79  		db.SpanLength[i] = d.Length
    80  		db.SpanNewlines[i] = d.Newlines
    81  		db.SpanFirstNewline[i] = d.FirstNewline
    82  		db.SpanLastNewline[i] = d.LastNewline
    83  		switch d.Type {
    84  		case eq:
    85  			db.SpanType[i] = srvpb.Diff_EQUAL
    86  		case ins:
    87  			db.SpanType[i] = srvpb.Diff_INSERT
    88  		case del:
    89  			db.SpanType[i] = srvpb.Diff_DELETE
    90  		default:
    91  			return nil, fmt.Errorf("unknown diff type: %s", d.Type)
    92  		}
    93  	}
    94  	return proto.Marshal(db)
    95  }
    96  
    97  // Unmarshal decodes a Patcher from its packed binary format.
    98  func Unmarshal(rec []byte) (*Patcher, error) {
    99  	var db srvpb.Diff
   100  	if err := proto.Unmarshal(rec, &db); err != nil {
   101  		return nil, err
   102  	}
   103  	if len(db.SpanLength) != len(db.SpanType) {
   104  		return nil, fmt.Errorf("length of span_length does not match length of span_type: %d vs %d", len(db.SpanLength), len(db.SpanType))
   105  	} else if len(db.SpanLength) != len(db.SpanNewlines) {
   106  		return nil, fmt.Errorf("length of span_length does not match length of span_newlines: %d vs %d", len(db.SpanLength), len(db.SpanNewlines))
   107  	} else if len(db.SpanLength) != len(db.SpanFirstNewline) {
   108  		return nil, fmt.Errorf("length of span_length does not match length of span_first_newline: %d vs %d", len(db.SpanLength), len(db.SpanFirstNewline))
   109  	} else if len(db.SpanLength) != len(db.SpanLastNewline) {
   110  		return nil, fmt.Errorf("length of span_length does not match length of span_last_newline: %d vs %d", len(db.SpanLength), len(db.SpanLastNewline))
   111  	}
   112  	spans := make([]diff, len(db.SpanLength))
   113  	for i, l := range db.SpanLength {
   114  		spans[i] = diff{
   115  			Length:       l,
   116  			Newlines:     db.SpanNewlines[i],
   117  			FirstNewline: db.SpanFirstNewline[i],
   118  			LastNewline:  db.SpanLastNewline[i],
   119  		}
   120  		switch db.SpanType[i] {
   121  		case srvpb.Diff_EQUAL:
   122  			spans[i].Type = eq
   123  		case srvpb.Diff_INSERT:
   124  			spans[i].Type = ins
   125  		case srvpb.Diff_DELETE:
   126  			spans[i].Type = del
   127  		default:
   128  			return nil, fmt.Errorf("unknown diff type: %s", db.SpanType[i])
   129  		}
   130  		if i != 0 {
   131  			updatePrefix(&spans[i-1], &spans[i])
   132  		}
   133  	}
   134  	return &Patcher{spans}, nil
   135  }
   136  
   137  func updatePrefix(prev, d *diff) {
   138  	d.oldPrefix = prev.oldPrefix
   139  	d.newPrefix = prev.newPrefix
   140  	d.oldPrefix.Type = del
   141  	d.newPrefix.Type = ins
   142  	d.oldPrefix.Update(*prev)
   143  	d.newPrefix.Update(*prev)
   144  }
   145  
   146  type diff struct {
   147  	Length int32
   148  	Type   diffmatchpatch.Operation
   149  
   150  	Newlines     int32
   151  	FirstNewline int32
   152  	LastNewline  int32
   153  
   154  	oldPrefix, newPrefix offsetTracker
   155  }
   156  
   157  const (
   158  	eq  = diffmatchpatch.DiffEqual
   159  	ins = diffmatchpatch.DiffInsert
   160  	del = diffmatchpatch.DiffDelete
   161  )
   162  
   163  func mapToOffsets(ds []diffmatchpatch.Diff) []diff {
   164  	res := make([]diff, len(ds))
   165  	for i, d := range ds {
   166  		l := len(d.Text)
   167  		var newlines int
   168  		var first, last int = -1, -1
   169  		for j := 0; j < l; j++ {
   170  			if d.Text[j] != '\n' {
   171  				continue
   172  			}
   173  			newlines++
   174  			if first == -1 {
   175  				first = j
   176  			}
   177  			last = j
   178  		}
   179  		res[i] = diff{
   180  			Length:       int32(l),
   181  			Type:         d.Type,
   182  			Newlines:     int32(newlines),
   183  			FirstNewline: int32(first),
   184  			LastNewline:  int32(last),
   185  		}
   186  		if i != 0 {
   187  			updatePrefix(&res[i-1], &res[i])
   188  		}
   189  	}
   190  	return res
   191  }
   192  
   193  type offsetTracker struct {
   194  	Type diffmatchpatch.Operation
   195  
   196  	Offset       int32
   197  	Lines        int32
   198  	ColumnOffset int32
   199  }
   200  
   201  func (t *offsetTracker) Update(d diff) {
   202  	if d.Type != eq && d.Type != t.Type {
   203  		return
   204  	}
   205  	t.Offset += d.Length
   206  	t.Lines += d.Newlines
   207  	if d.LastNewline == -1 {
   208  		t.ColumnOffset += d.Length
   209  	} else {
   210  		t.ColumnOffset = d.Length - d.LastNewline - 1
   211  	}
   212  }
   213  
   214  // PatchSpan returns the resulting Span of mapping the given Span from the
   215  // Patcher's constructed oldText to its newText.  If the span no longer exists
   216  // in newText or is invalid, the returned bool will be false.  As a convenience,
   217  // if p==nil, the original span will be returned.
   218  func (p *Patcher) PatchSpan(s *cpb.Span) (span *cpb.Span, exists bool) {
   219  	spanStart, spanEnd := ByteOffsets(s)
   220  	if spanStart > spanEnd {
   221  		return nil, false
   222  	} else if p == nil || s == nil {
   223  		return s, true
   224  	}
   225  
   226  	// Find the diff span that contains the starting offset.
   227  	idx := sort.Search(len(p.spans), func(i int) bool {
   228  		return spanStart < p.spans[i].oldPrefix.Offset
   229  	}) - 1
   230  	if idx < 0 {
   231  		return nil, false
   232  	}
   233  
   234  	d := p.spans[idx]
   235  	if d.Type != eq || spanEnd > d.oldPrefix.Offset+d.Length {
   236  		return nil, false
   237  	}
   238  
   239  	lineDiff := d.newPrefix.Lines - d.oldPrefix.Lines
   240  	colDiff := d.newPrefix.ColumnOffset - d.oldPrefix.ColumnOffset
   241  	if d.FirstNewline != -1 && spanStart-d.oldPrefix.Offset >= d.FirstNewline {
   242  		// The given span is past the first newline so it has no column diff.
   243  		colDiff = 0
   244  	}
   245  	return &cpb.Span{
   246  		Start: &cpb.Point{
   247  			ByteOffset:   d.newPrefix.Offset + (spanStart - d.oldPrefix.Offset),
   248  			ColumnOffset: s.GetStart().GetColumnOffset() + colDiff,
   249  			LineNumber:   s.GetStart().GetLineNumber() + lineDiff,
   250  		},
   251  		End: &cpb.Point{
   252  			ByteOffset:   d.newPrefix.Offset + (spanEnd - d.oldPrefix.Offset),
   253  			ColumnOffset: s.GetEnd().GetColumnOffset() + colDiff,
   254  			LineNumber:   s.GetEnd().GetLineNumber() + lineDiff,
   255  		},
   256  	}, true
   257  }
   258  
   259  // ByteOffsets returns the starting and ending byte offsets of the Span.
   260  func ByteOffsets(s *cpb.Span) (int32, int32) {
   261  	return s.GetStart().GetByteOffset(), s.GetEnd().GetByteOffset()
   262  }
   263  
   264  // Patch returns the resulting span of mapping the given span from the Patcher's
   265  // constructed oldText to its newText.  If the span no longer exists in newText
   266  // or is invalid, the returned bool will be false.  As a convenience, if p==nil,
   267  // the original span will be returned.
   268  func (p *Patcher) Patch(spanStart, spanEnd int32) (newStart, newEnd int32, exists bool) {
   269  	if spanStart > spanEnd {
   270  		return 0, 0, false
   271  	} else if p == nil {
   272  		return spanStart, spanEnd, true
   273  	}
   274  
   275  	if spanStart == spanEnd {
   276  		// Give zero-width span a positive length for the below algorithm; then fix
   277  		// the length on return.
   278  		spanEnd++
   279  		defer func() { newEnd = newStart }()
   280  	}
   281  
   282  	var old, new int32
   283  	for _, d := range p.spans {
   284  		l := d.Length
   285  		if old > spanStart {
   286  			return 0, 0, false
   287  		}
   288  		switch d.Type {
   289  		case eq:
   290  			if old <= spanStart && spanEnd <= old+l {
   291  				newStart = new + (spanStart - old)
   292  				newEnd = new + (spanEnd - old)
   293  				exists = true
   294  				return
   295  			}
   296  			old += l
   297  			new += l
   298  		case del:
   299  			old += l
   300  		case ins:
   301  			new += l
   302  		}
   303  	}
   304  
   305  	return 0, 0, false
   306  }
   307  
   308  // Normalizer fixes xref.Locations within a given source text so that each point
   309  // has consistent byte_offset, line_number, and column_offset fields within the
   310  // range of text's length and its line lengths.
   311  type Normalizer struct {
   312  	textLen   int32
   313  	lineLen   []int32
   314  	prefixLen []int32
   315  }
   316  
   317  // NewNormalizer returns a Normalizer for Locations within text.
   318  func NewNormalizer(text []byte) *Normalizer {
   319  	lines := bytes.Split(text, lineEnd)
   320  	lineLen := make([]int32, len(lines))
   321  	prefixLen := make([]int32, len(lines))
   322  	for i := 1; i < len(lines); i++ {
   323  		lineLen[i-1] = int32(len(lines[i-1]) + len(lineEnd))
   324  		prefixLen[i] = prefixLen[i-1] + lineLen[i-1]
   325  	}
   326  	lineLen[len(lines)-1] = int32(len(lines[len(lines)-1]) + len(lineEnd))
   327  	return &Normalizer{int32(len(text)), lineLen, prefixLen}
   328  }
   329  
   330  // Location returns a normalized location within the Normalizer's text.
   331  // Normalized FILE locations have no start/end points.  Normalized SPAN
   332  // locations have fully populated start/end points clamped in the range [0,
   333  // len(text)).
   334  func (n *Normalizer) Location(loc *xpb.Location) (*xpb.Location, error) {
   335  	nl := &xpb.Location{}
   336  	if loc == nil {
   337  		return nl, nil
   338  	}
   339  	nl.Ticket = loc.Ticket
   340  	nl.Kind = loc.Kind
   341  	if loc.Kind == xpb.Location_FILE {
   342  		return nl, nil
   343  	}
   344  
   345  	if loc.Span == nil {
   346  		return nil, errors.New("invalid SPAN: missing span")
   347  	} else if loc.Span.Start == nil {
   348  		return nil, errors.New("invalid SPAN: missing span start point")
   349  	} else if loc.Span.End == nil {
   350  		return nil, errors.New("invalid SPAN: missing span end point")
   351  	}
   352  
   353  	nl.Span = n.Span(loc.Span)
   354  
   355  	start, end := nl.Span.Start.ByteOffset, nl.Span.End.ByteOffset
   356  	if start > end {
   357  		return nil, fmt.Errorf("invalid SPAN: start (%d) is after end (%d)", start, end)
   358  	}
   359  	return nl, nil
   360  }
   361  
   362  // Span returns a Span with its start and end normalized.
   363  func (n *Normalizer) Span(s *cpb.Span) *cpb.Span {
   364  	if s == nil {
   365  		return nil
   366  	}
   367  	return &cpb.Span{
   368  		Start: n.Point(s.Start),
   369  		End:   n.Point(s.End),
   370  	}
   371  }
   372  
   373  // SpanOffsets returns a Span based on normalized start and end byte offsets.
   374  func (n *Normalizer) SpanOffsets(start, end int32) *cpb.Span {
   375  	return &cpb.Span{
   376  		Start: n.ByteOffset(start),
   377  		End:   n.ByteOffset(end),
   378  	}
   379  }
   380  
   381  var lineEnd = []byte("\n")
   382  
   383  // Point returns a normalized point within the Normalizer's text.  A normalized
   384  // point has all of its fields set consistently and clamped within the range
   385  // [0,len(text)).
   386  func (n *Normalizer) Point(p *cpb.Point) *cpb.Point {
   387  	if p == nil {
   388  		return nil
   389  	}
   390  
   391  	if p.ByteOffset > 0 {
   392  		return n.ByteOffset(p.ByteOffset)
   393  	} else if p.LineNumber > 0 {
   394  		np := &cpb.Point{
   395  			LineNumber:   p.LineNumber,
   396  			ColumnOffset: p.ColumnOffset,
   397  		}
   398  
   399  		if totalLines := int32(len(n.lineLen)); p.LineNumber > totalLines {
   400  			np.LineNumber = totalLines
   401  			np.ColumnOffset = n.lineLen[np.LineNumber-1] - 1
   402  		}
   403  		if np.ColumnOffset < 0 {
   404  			np.ColumnOffset = 0
   405  		} else if np.ColumnOffset > 0 {
   406  			if lineLen := n.lineLen[np.LineNumber-1] - 1; p.ColumnOffset > lineLen {
   407  				np.ColumnOffset = lineLen
   408  			}
   409  		}
   410  
   411  		np.ByteOffset = n.prefixLen[np.LineNumber-1] + np.ColumnOffset
   412  
   413  		return np
   414  	}
   415  
   416  	return &cpb.Point{LineNumber: 1}
   417  }
   418  
   419  // ByteOffset returns a normalized point based on the given offset within the
   420  // Normalizer's text.  A normalized point has all of its fields set consistently
   421  // and clamped within the range [0,len(text)).
   422  func (n *Normalizer) ByteOffset(offset int32) *cpb.Point {
   423  	np := &cpb.Point{ByteOffset: offset}
   424  	if np.ByteOffset > n.textLen {
   425  		np.ByteOffset = n.textLen
   426  	}
   427  
   428  	np.LineNumber = int32(sort.Search(len(n.lineLen), func(i int) bool {
   429  		return n.prefixLen[i] > np.ByteOffset
   430  	}))
   431  	np.ColumnOffset = np.ByteOffset - n.prefixLen[np.LineNumber-1]
   432  
   433  	return np
   434  }