kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/span/span.go (about) 1 /* 2 * Copyright 2018 The Kythe Authors. All rights reserved. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Package span implements utilities to resolve byte offsets within a file to 18 // line and column numbers. 19 package span // import "kythe.io/kythe/go/util/span" 20 21 import ( 22 "bytes" 23 "errors" 24 "fmt" 25 "sort" 26 27 "kythe.io/kythe/go/util/log" 28 29 "github.com/sergi/go-diff/diffmatchpatch" 30 "google.golang.org/protobuf/proto" 31 32 cpb "kythe.io/kythe/proto/common_go_proto" 33 srvpb "kythe.io/kythe/proto/serving_go_proto" 34 xpb "kythe.io/kythe/proto/xref_go_proto" 35 ) 36 37 // InBounds reports whether [start,end) is bounded by the specified [startBoundary,endBoundary) span. 38 func InBounds(kind xpb.DecorationsRequest_SpanKind, start, end, startBoundary, endBoundary int32) bool { 39 switch kind { 40 case xpb.DecorationsRequest_WITHIN_SPAN: 41 return start >= startBoundary && end <= endBoundary 42 case xpb.DecorationsRequest_AROUND_SPAN: 43 return start <= startBoundary && end >= endBoundary 44 default: 45 log.Warningf("unknown DecorationsRequest_SpanKind: %v", kind) 46 } 47 return false 48 } 49 50 // Patcher uses a computed diff between two texts to map spans from the original 51 // text to the new text. 52 type Patcher struct { 53 spans []diff 54 } 55 56 // NewPatcher returns a Patcher based on the diff between oldText and newText. 57 func NewPatcher(oldText, newText []byte) (p *Patcher, err error) { 58 defer func() { 59 // dmp may panic on some large requests; catch it and return an error instead 60 if r := recover(); r != nil { 61 err = fmt.Errorf("diffmatchpatch panic: %v", r) 62 } 63 }() 64 dmp := diffmatchpatch.New() 65 diff := dmp.DiffCleanupEfficiency(dmp.DiffMain(string(oldText), string(newText), false)) 66 return &Patcher{mapToOffsets(diff)}, nil 67 } 68 69 // Marshal encodes the Patcher into a packed binary format. 70 func (p *Patcher) Marshal() ([]byte, error) { 71 db := &srvpb.Diff{ 72 SpanLength: make([]int32, len(p.spans)), 73 SpanType: make([]srvpb.Diff_Type, len(p.spans)), 74 SpanNewlines: make([]int32, len(p.spans)), 75 SpanFirstNewline: make([]int32, len(p.spans)), 76 SpanLastNewline: make([]int32, len(p.spans)), 77 } 78 for i, d := range p.spans { 79 db.SpanLength[i] = d.Length 80 db.SpanNewlines[i] = d.Newlines 81 db.SpanFirstNewline[i] = d.FirstNewline 82 db.SpanLastNewline[i] = d.LastNewline 83 switch d.Type { 84 case eq: 85 db.SpanType[i] = srvpb.Diff_EQUAL 86 case ins: 87 db.SpanType[i] = srvpb.Diff_INSERT 88 case del: 89 db.SpanType[i] = srvpb.Diff_DELETE 90 default: 91 return nil, fmt.Errorf("unknown diff type: %s", d.Type) 92 } 93 } 94 return proto.Marshal(db) 95 } 96 97 // Unmarshal decodes a Patcher from its packed binary format. 98 func Unmarshal(rec []byte) (*Patcher, error) { 99 var db srvpb.Diff 100 if err := proto.Unmarshal(rec, &db); err != nil { 101 return nil, err 102 } 103 if len(db.SpanLength) != len(db.SpanType) { 104 return nil, fmt.Errorf("length of span_length does not match length of span_type: %d vs %d", len(db.SpanLength), len(db.SpanType)) 105 } else if len(db.SpanLength) != len(db.SpanNewlines) { 106 return nil, fmt.Errorf("length of span_length does not match length of span_newlines: %d vs %d", len(db.SpanLength), len(db.SpanNewlines)) 107 } else if len(db.SpanLength) != len(db.SpanFirstNewline) { 108 return nil, fmt.Errorf("length of span_length does not match length of span_first_newline: %d vs %d", len(db.SpanLength), len(db.SpanFirstNewline)) 109 } else if len(db.SpanLength) != len(db.SpanLastNewline) { 110 return nil, fmt.Errorf("length of span_length does not match length of span_last_newline: %d vs %d", len(db.SpanLength), len(db.SpanLastNewline)) 111 } 112 spans := make([]diff, len(db.SpanLength)) 113 for i, l := range db.SpanLength { 114 spans[i] = diff{ 115 Length: l, 116 Newlines: db.SpanNewlines[i], 117 FirstNewline: db.SpanFirstNewline[i], 118 LastNewline: db.SpanLastNewline[i], 119 } 120 switch db.SpanType[i] { 121 case srvpb.Diff_EQUAL: 122 spans[i].Type = eq 123 case srvpb.Diff_INSERT: 124 spans[i].Type = ins 125 case srvpb.Diff_DELETE: 126 spans[i].Type = del 127 default: 128 return nil, fmt.Errorf("unknown diff type: %s", db.SpanType[i]) 129 } 130 if i != 0 { 131 updatePrefix(&spans[i-1], &spans[i]) 132 } 133 } 134 return &Patcher{spans}, nil 135 } 136 137 func updatePrefix(prev, d *diff) { 138 d.oldPrefix = prev.oldPrefix 139 d.newPrefix = prev.newPrefix 140 d.oldPrefix.Type = del 141 d.newPrefix.Type = ins 142 d.oldPrefix.Update(*prev) 143 d.newPrefix.Update(*prev) 144 } 145 146 type diff struct { 147 Length int32 148 Type diffmatchpatch.Operation 149 150 Newlines int32 151 FirstNewline int32 152 LastNewline int32 153 154 oldPrefix, newPrefix offsetTracker 155 } 156 157 const ( 158 eq = diffmatchpatch.DiffEqual 159 ins = diffmatchpatch.DiffInsert 160 del = diffmatchpatch.DiffDelete 161 ) 162 163 func mapToOffsets(ds []diffmatchpatch.Diff) []diff { 164 res := make([]diff, len(ds)) 165 for i, d := range ds { 166 l := len(d.Text) 167 var newlines int 168 var first, last int = -1, -1 169 for j := 0; j < l; j++ { 170 if d.Text[j] != '\n' { 171 continue 172 } 173 newlines++ 174 if first == -1 { 175 first = j 176 } 177 last = j 178 } 179 res[i] = diff{ 180 Length: int32(l), 181 Type: d.Type, 182 Newlines: int32(newlines), 183 FirstNewline: int32(first), 184 LastNewline: int32(last), 185 } 186 if i != 0 { 187 updatePrefix(&res[i-1], &res[i]) 188 } 189 } 190 return res 191 } 192 193 type offsetTracker struct { 194 Type diffmatchpatch.Operation 195 196 Offset int32 197 Lines int32 198 ColumnOffset int32 199 } 200 201 func (t *offsetTracker) Update(d diff) { 202 if d.Type != eq && d.Type != t.Type { 203 return 204 } 205 t.Offset += d.Length 206 t.Lines += d.Newlines 207 if d.LastNewline == -1 { 208 t.ColumnOffset += d.Length 209 } else { 210 t.ColumnOffset = d.Length - d.LastNewline - 1 211 } 212 } 213 214 // PatchSpan returns the resulting Span of mapping the given Span from the 215 // Patcher's constructed oldText to its newText. If the span no longer exists 216 // in newText or is invalid, the returned bool will be false. As a convenience, 217 // if p==nil, the original span will be returned. 218 func (p *Patcher) PatchSpan(s *cpb.Span) (span *cpb.Span, exists bool) { 219 spanStart, spanEnd := ByteOffsets(s) 220 if spanStart > spanEnd { 221 return nil, false 222 } else if p == nil || s == nil { 223 return s, true 224 } 225 226 // Find the diff span that contains the starting offset. 227 idx := sort.Search(len(p.spans), func(i int) bool { 228 return spanStart < p.spans[i].oldPrefix.Offset 229 }) - 1 230 if idx < 0 { 231 return nil, false 232 } 233 234 d := p.spans[idx] 235 if d.Type != eq || spanEnd > d.oldPrefix.Offset+d.Length { 236 return nil, false 237 } 238 239 lineDiff := d.newPrefix.Lines - d.oldPrefix.Lines 240 colDiff := d.newPrefix.ColumnOffset - d.oldPrefix.ColumnOffset 241 if d.FirstNewline != -1 && spanStart-d.oldPrefix.Offset >= d.FirstNewline { 242 // The given span is past the first newline so it has no column diff. 243 colDiff = 0 244 } 245 return &cpb.Span{ 246 Start: &cpb.Point{ 247 ByteOffset: d.newPrefix.Offset + (spanStart - d.oldPrefix.Offset), 248 ColumnOffset: s.GetStart().GetColumnOffset() + colDiff, 249 LineNumber: s.GetStart().GetLineNumber() + lineDiff, 250 }, 251 End: &cpb.Point{ 252 ByteOffset: d.newPrefix.Offset + (spanEnd - d.oldPrefix.Offset), 253 ColumnOffset: s.GetEnd().GetColumnOffset() + colDiff, 254 LineNumber: s.GetEnd().GetLineNumber() + lineDiff, 255 }, 256 }, true 257 } 258 259 // ByteOffsets returns the starting and ending byte offsets of the Span. 260 func ByteOffsets(s *cpb.Span) (int32, int32) { 261 return s.GetStart().GetByteOffset(), s.GetEnd().GetByteOffset() 262 } 263 264 // Patch returns the resulting span of mapping the given span from the Patcher's 265 // constructed oldText to its newText. If the span no longer exists in newText 266 // or is invalid, the returned bool will be false. As a convenience, if p==nil, 267 // the original span will be returned. 268 func (p *Patcher) Patch(spanStart, spanEnd int32) (newStart, newEnd int32, exists bool) { 269 if spanStart > spanEnd { 270 return 0, 0, false 271 } else if p == nil { 272 return spanStart, spanEnd, true 273 } 274 275 if spanStart == spanEnd { 276 // Give zero-width span a positive length for the below algorithm; then fix 277 // the length on return. 278 spanEnd++ 279 defer func() { newEnd = newStart }() 280 } 281 282 var old, new int32 283 for _, d := range p.spans { 284 l := d.Length 285 if old > spanStart { 286 return 0, 0, false 287 } 288 switch d.Type { 289 case eq: 290 if old <= spanStart && spanEnd <= old+l { 291 newStart = new + (spanStart - old) 292 newEnd = new + (spanEnd - old) 293 exists = true 294 return 295 } 296 old += l 297 new += l 298 case del: 299 old += l 300 case ins: 301 new += l 302 } 303 } 304 305 return 0, 0, false 306 } 307 308 // Normalizer fixes xref.Locations within a given source text so that each point 309 // has consistent byte_offset, line_number, and column_offset fields within the 310 // range of text's length and its line lengths. 311 type Normalizer struct { 312 textLen int32 313 lineLen []int32 314 prefixLen []int32 315 } 316 317 // NewNormalizer returns a Normalizer for Locations within text. 318 func NewNormalizer(text []byte) *Normalizer { 319 lines := bytes.Split(text, lineEnd) 320 lineLen := make([]int32, len(lines)) 321 prefixLen := make([]int32, len(lines)) 322 for i := 1; i < len(lines); i++ { 323 lineLen[i-1] = int32(len(lines[i-1]) + len(lineEnd)) 324 prefixLen[i] = prefixLen[i-1] + lineLen[i-1] 325 } 326 lineLen[len(lines)-1] = int32(len(lines[len(lines)-1]) + len(lineEnd)) 327 return &Normalizer{int32(len(text)), lineLen, prefixLen} 328 } 329 330 // Location returns a normalized location within the Normalizer's text. 331 // Normalized FILE locations have no start/end points. Normalized SPAN 332 // locations have fully populated start/end points clamped in the range [0, 333 // len(text)). 334 func (n *Normalizer) Location(loc *xpb.Location) (*xpb.Location, error) { 335 nl := &xpb.Location{} 336 if loc == nil { 337 return nl, nil 338 } 339 nl.Ticket = loc.Ticket 340 nl.Kind = loc.Kind 341 if loc.Kind == xpb.Location_FILE { 342 return nl, nil 343 } 344 345 if loc.Span == nil { 346 return nil, errors.New("invalid SPAN: missing span") 347 } else if loc.Span.Start == nil { 348 return nil, errors.New("invalid SPAN: missing span start point") 349 } else if loc.Span.End == nil { 350 return nil, errors.New("invalid SPAN: missing span end point") 351 } 352 353 nl.Span = n.Span(loc.Span) 354 355 start, end := nl.Span.Start.ByteOffset, nl.Span.End.ByteOffset 356 if start > end { 357 return nil, fmt.Errorf("invalid SPAN: start (%d) is after end (%d)", start, end) 358 } 359 return nl, nil 360 } 361 362 // Span returns a Span with its start and end normalized. 363 func (n *Normalizer) Span(s *cpb.Span) *cpb.Span { 364 if s == nil { 365 return nil 366 } 367 return &cpb.Span{ 368 Start: n.Point(s.Start), 369 End: n.Point(s.End), 370 } 371 } 372 373 // SpanOffsets returns a Span based on normalized start and end byte offsets. 374 func (n *Normalizer) SpanOffsets(start, end int32) *cpb.Span { 375 return &cpb.Span{ 376 Start: n.ByteOffset(start), 377 End: n.ByteOffset(end), 378 } 379 } 380 381 var lineEnd = []byte("\n") 382 383 // Point returns a normalized point within the Normalizer's text. A normalized 384 // point has all of its fields set consistently and clamped within the range 385 // [0,len(text)). 386 func (n *Normalizer) Point(p *cpb.Point) *cpb.Point { 387 if p == nil { 388 return nil 389 } 390 391 if p.ByteOffset > 0 { 392 return n.ByteOffset(p.ByteOffset) 393 } else if p.LineNumber > 0 { 394 np := &cpb.Point{ 395 LineNumber: p.LineNumber, 396 ColumnOffset: p.ColumnOffset, 397 } 398 399 if totalLines := int32(len(n.lineLen)); p.LineNumber > totalLines { 400 np.LineNumber = totalLines 401 np.ColumnOffset = n.lineLen[np.LineNumber-1] - 1 402 } 403 if np.ColumnOffset < 0 { 404 np.ColumnOffset = 0 405 } else if np.ColumnOffset > 0 { 406 if lineLen := n.lineLen[np.LineNumber-1] - 1; p.ColumnOffset > lineLen { 407 np.ColumnOffset = lineLen 408 } 409 } 410 411 np.ByteOffset = n.prefixLen[np.LineNumber-1] + np.ColumnOffset 412 413 return np 414 } 415 416 return &cpb.Point{LineNumber: 1} 417 } 418 419 // ByteOffset returns a normalized point based on the given offset within the 420 // Normalizer's text. A normalized point has all of its fields set consistently 421 // and clamped within the range [0,len(text)). 422 func (n *Normalizer) ByteOffset(offset int32) *cpb.Point { 423 np := &cpb.Point{ByteOffset: offset} 424 if np.ByteOffset > n.textLen { 425 np.ByteOffset = n.textLen 426 } 427 428 np.LineNumber = int32(sort.Search(len(n.lineLen), func(i int) bool { 429 return n.prefixLen[i] > np.ByteOffset 430 })) 431 np.ColumnOffset = np.ByteOffset - n.prefixLen[np.LineNumber-1] 432 433 return np 434 }