kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/html/html_test.go (about)

     1  /*
     2   * Copyright 2014 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package html
    18  
    19  import (
    20  	"bytes"
    21  	"testing"
    22  
    23  	"golang.org/x/net/html"
    24  )
    25  
    26  const testHTML = "<b>test</b> <span>body <i>text</i></span>"
    27  
    28  func TestSliceNode(t *testing.T) {
    29  	root := parseHTML(t, testHTML)
    30  	fullPlain := PlainText(root)
    31  	o := textualOffsets(root)
    32  
    33  	tests := []struct {
    34  		path              string
    35  		start, split, end int
    36  	}{
    37  		{"flf", 0, 2, 4}, // <b>
    38  		{"flfn", 2, 3, 4},
    39  		{"flfnnnn", 5, 7, 14}, // <span>
    40  		{"flfnnnnf", 5, 6, 7},
    41  		{"flfnnnn", 5, 6, 7},
    42  		{"flfnnnnnn", 7, 12, 14},
    43  	}
    44  
    45  	for _, test := range tests {
    46  		node := MustZip(root, test.path)
    47  		plain := PlainText(node)
    48  		left, right := sliceNode(o, node, test.split)
    49  		checkBounds(t, o, left, test.start, test.split)
    50  		checkBounds(t, o, right, test.split, test.end)
    51  
    52  		if resultPlain := PlainText(left, right); plain != resultPlain {
    53  			t.Errorf("text: Expected %q; Found %q", plain, resultPlain)
    54  		}
    55  		if leftPlain, expected := PlainText(left), fullPlain[test.start:test.split]; leftPlain != expected {
    56  			t.Errorf("left text: Expected %q; Found %q", leftPlain, expected)
    57  		}
    58  		if rightPlain, expected := PlainText(right), fullPlain[test.split:test.end]; rightPlain != expected {
    59  			t.Errorf("right text: Expected %q; Found %q", rightPlain, expected)
    60  		}
    61  
    62  		if res := MustZip(root, test.path); left != res {
    63  			t.Errorf("left: Expected %v; Found %v", left, res)
    64  		}
    65  		if res := MustZip(root, test.path+"n"); right != res {
    66  			t.Errorf("right: Expected %v; Found %v", right, res)
    67  		}
    68  	}
    69  }
    70  
    71  func TestOffsets(t *testing.T) {
    72  	n := parseHTML(t, testHTML)
    73  	o := textualOffsets(n)
    74  
    75  	tests := []struct {
    76  		path       string
    77  		start, end int
    78  	}{
    79  		{"", 0, 14},
    80  		{"f", 0, 14},  // <html>
    81  		{"ff", 0, 0},  // <title>
    82  		{"fl", 0, 14}, // <body>
    83  		{"flf", 0, 4}, // <b>
    84  		{"flff", 0, 4},
    85  		{"flfn", 4, 5},
    86  		{"flfnn", 5, 14}, // <span>
    87  		{"flfnnf", 5, 10},
    88  		{"flfnnfn", 10, 14}, // <i>
    89  		{"flfnnfnf", 10, 14},
    90  	}
    91  
    92  	for _, test := range tests {
    93  		checkBounds(t, o, MustZip(n, test.path), test.start, test.end)
    94  	}
    95  }
    96  
    97  func checkBounds(t *testing.T, offsets *nodeOffsets, n *html.Node, start, end int) {
    98  	if s, e := offsets.Bounds(n); s != start {
    99  		t.Errorf("checkBounds: start expected %d; received %d", start, s)
   100  	} else if e != end {
   101  		t.Errorf("checkBounds: end expected %d; received %d", end, e)
   102  	}
   103  }
   104  
   105  func parseHTML(t *testing.T, s string) *html.Node {
   106  	buf := new(bytes.Buffer)
   107  	_, err := buf.Write([]byte(s))
   108  	if err != nil {
   109  		t.Error("Could not write string to Buffer")
   110  	}
   111  	n, err := html.Parse(buf)
   112  	if err != nil {
   113  		t.Error("Could not parse HTML")
   114  	}
   115  	return n
   116  }
   117  
   118  func TestZip(t *testing.T) {
   119  	n := parseHTML(t, testHTML)
   120  
   121  	tests := []struct {
   122  		expected *html.Node
   123  		path     string
   124  	}{
   125  		{n, ""},
   126  		{n.FirstChild, "f"},
   127  		{n.FirstChild.FirstChild.NextSibling, "ffn"},
   128  		{n.FirstChild, "ffnu"},
   129  		{n.Parent, "u"},
   130  		{n.LastChild.LastChild, "ll"},
   131  		{n.LastChild.LastChild.FirstChild.NextSibling, "llfnnp"},
   132  	}
   133  
   134  	for _, test := range tests {
   135  		if res, err := Zip(n, test.path); err != nil {
   136  			t.Error(err)
   137  		} else if res != test.expected {
   138  			t.Errorf("Path %q; Expected %v; Received %v", test.path, test.expected, res)
   139  		}
   140  	}
   141  }
   142  
   143  func TestPlainText(t *testing.T) {
   144  	root := parseHTML(t, testHTML)
   145  
   146  	tests := []struct{ path, expected string }{
   147  		{"", "test body text"},
   148  		{"fl", "test body text"},
   149  		{"flf", "test"},
   150  		{"flff", "test"},
   151  		{"flfn", " "},
   152  		{"flfnn", "body text"},
   153  		{"flfnnf", "body "},
   154  		{"flfnnl", "text"},
   155  		{"flfnnlf", "text"},
   156  	}
   157  
   158  	for _, test := range tests {
   159  		if res := PlainText(MustZip(root, test.path)); test.expected != res {
   160  			t.Errorf("Path %q; Expected %q; Found %q", test.path, test.expected, res)
   161  		}
   162  	}
   163  }