go.chromium.org/luci@v0.0.0-20240309015107-7cdc2e660f33/common/data/text/document.go (about)

     1  // Copyright 2019 The LUCI Authors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package text
    16  
    17  import (
    18  	"strings"
    19  	"unicode"
    20  	"unicode/utf8"
    21  )
    22  
    23  // Doc transforms doc:
    24  //
    25  //  1. Strips leading and trailing whitespace.
    26  //  2. Removes common '\t' indentation.
    27  //  3. Replaces '\n' not-followed by whitespace with ' '.
    28  //
    29  // See example.
    30  //
    31  // This function is not fast.
    32  func Doc(doc string) string {
    33  	lines := strings.Split(doc, "\n")
    34  
    35  	// Strip leading and trailing blank lines before computing common indentation.
    36  	for len(lines) > 0 && isBlank(lines[0]) {
    37  		lines = lines[1:]
    38  	}
    39  	for len(lines) > 0 && isBlank(lines[len(lines)-1]) {
    40  		lines = lines[:len(lines)-1]
    41  	}
    42  
    43  	// Compute common TAB indentation.
    44  	commonIndent := -1
    45  	for _, line := range lines {
    46  		if isBlank(line) {
    47  			continue
    48  		}
    49  
    50  		indent := 0
    51  		for indent < len(line) && line[indent] == '\t' {
    52  			indent++
    53  		}
    54  		if commonIndent == -1 || commonIndent > indent {
    55  			commonIndent = indent
    56  		}
    57  	}
    58  	if commonIndent == -1 {
    59  		commonIndent = 0
    60  	}
    61  
    62  	// Combine lines, but replace ('\n' followed by non-whitespace) with one
    63  	// space.
    64  	ret := &strings.Builder{}
    65  	ret.Grow(len(doc))
    66  	newParagraph := true
    67  	for _, line := range lines {
    68  		if isBlank(line) {
    69  			// This is a blank line between two paragraphs.
    70  			// Separate them with two '\n'.
    71  			ret.WriteString("\n\n")
    72  			newParagraph = true
    73  			continue
    74  		}
    75  
    76  		line = line[commonIndent:]
    77  
    78  		switch first, _ := utf8.DecodeRuneInString(line); {
    79  		// If the line starts with whitespace, preserve the structure.
    80  		case unicode.IsSpace(first):
    81  			ret.WriteRune('\n')
    82  
    83  		// Otherwise replace '\n' with ' ', unless it is a new paragraph.
    84  		case !newParagraph:
    85  			ret.WriteRune(' ')
    86  		}
    87  
    88  		ret.WriteString(line)
    89  		newParagraph = false
    90  	}
    91  	return strings.TrimSpace(ret.String())
    92  }
    93  
    94  func isBlank(line string) bool {
    95  	return strings.TrimSpace(line) == ""
    96  }