github.com/go-xe2/third@v1.0.3/golang.org/x/text/unicode/rangetable/merge.go (about) 1 // Copyright 2015 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rangetable 6 7 import ( 8 "unicode" 9 ) 10 11 // atEnd is used to mark a completed iteration. 12 const atEnd = unicode.MaxRune + 1 13 14 // Merge returns a new RangeTable that is the union of the given tables. 15 // It can also be used to compact user-created RangeTables. The entries in 16 // R16 and R32 for any given RangeTable should be sorted and non-overlapping. 17 // 18 // A lookup in the resulting table can be several times faster than using In 19 // directly on the ranges. Merge is an expensive operation, however, and only 20 // makes sense if one intends to use the result for more than a couple of 21 // hundred lookups. 22 func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable { 23 rt := &unicode.RangeTable{} 24 if len(ranges) == 0 { 25 return rt 26 } 27 28 iter := tablesIter(make([]tableIndex, len(ranges))) 29 30 for i, t := range ranges { 31 iter[i] = tableIndex{t, 0, atEnd} 32 if len(t.R16) > 0 { 33 iter[i].next = rune(t.R16[0].Lo) 34 } 35 } 36 37 if r0 := iter.next16(); r0.Stride != 0 { 38 for { 39 r1 := iter.next16() 40 if r1.Stride == 0 { 41 rt.R16 = append(rt.R16, r0) 42 break 43 } 44 stride := r1.Lo - r0.Hi 45 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { 46 // Fully merge the next range into the previous one. 47 r0.Hi, r0.Stride = r1.Hi, stride 48 continue 49 } else if stride == r0.Stride { 50 // Move the first element of r1 to r0. This may eliminate an 51 // entry. 52 r0.Hi = r1.Lo 53 r0.Stride = stride 54 r1.Lo = r1.Lo + r1.Stride 55 if r1.Lo > r1.Hi { 56 continue 57 } 58 } 59 rt.R16 = append(rt.R16, r0) 60 r0 = r1 61 } 62 } 63 64 for i, t := range ranges { 65 iter[i] = tableIndex{t, 0, atEnd} 66 if len(t.R32) > 0 { 67 iter[i].next = rune(t.R32[0].Lo) 68 } 69 } 70 71 if r0 := iter.next32(); r0.Stride != 0 { 72 for { 73 r1 := iter.next32() 74 if r1.Stride == 0 { 75 rt.R32 = append(rt.R32, r0) 76 break 77 } 78 stride := r1.Lo - r0.Hi 79 if (r1.Lo == r1.Hi || stride == r1.Stride) && (r0.Lo == r0.Hi || stride == r0.Stride) { 80 // Fully merge the next range into the previous one. 81 r0.Hi, r0.Stride = r1.Hi, stride 82 continue 83 } else if stride == r0.Stride { 84 // Move the first element of r1 to r0. This may eliminate an 85 // entry. 86 r0.Hi = r1.Lo 87 r1.Lo = r1.Lo + r1.Stride 88 if r1.Lo > r1.Hi { 89 continue 90 } 91 } 92 rt.R32 = append(rt.R32, r0) 93 r0 = r1 94 } 95 } 96 97 for i := 0; i < len(rt.R16) && rt.R16[i].Hi <= unicode.MaxLatin1; i++ { 98 rt.LatinOffset = i + 1 99 } 100 101 return rt 102 } 103 104 type tableIndex struct { 105 t *unicode.RangeTable 106 p uint32 107 next rune 108 } 109 110 type tablesIter []tableIndex 111 112 // sortIter does an insertion sort using the next field of tableIndex. Insertion 113 // sort is a good sorting algorithm for this case. 114 func sortIter(t []tableIndex) { 115 for i := range t { 116 for j := i; j > 0 && t[j-1].next > t[j].next; j-- { 117 t[j], t[j-1] = t[j-1], t[j] 118 } 119 } 120 } 121 122 // next16 finds the ranged to be added to the table. If ranges overlap between 123 // multiple tables it clips the result to a non-overlapping range if the 124 // elements are not fully subsumed. It returns a zero range if there are no more 125 // ranges. 126 func (ti tablesIter) next16() unicode.Range16 { 127 sortIter(ti) 128 129 t0 := ti[0] 130 if t0.next == atEnd { 131 return unicode.Range16{} 132 } 133 r0 := t0.t.R16[t0.p] 134 r0.Lo = uint16(t0.next) 135 136 // We restrict the Hi of the current range if it overlaps with another range. 137 for i := range ti { 138 tn := ti[i] 139 // Since our tableIndices are sorted by next, we can break if the there 140 // is no overlap. The first value of a next range can always be merged 141 // into the current one, so we can break in case of equality as well. 142 if rune(r0.Hi) <= tn.next { 143 break 144 } 145 rn := tn.t.R16[tn.p] 146 rn.Lo = uint16(tn.next) 147 148 // Limit r0.Hi based on next ranges in list, but allow it to overlap 149 // with ranges as long as it subsumes it. 150 m := (rn.Lo - r0.Lo) % r0.Stride 151 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { 152 // Overlap, take the min of the two Hi values: for simplicity's sake 153 // we only process one range at a time. 154 if r0.Hi > rn.Hi { 155 r0.Hi = rn.Hi 156 } 157 } else { 158 // Not a compatible stride. Set to the last possible value before 159 // rn.Lo, but ensure there is at least one value. 160 if x := rn.Lo - m; r0.Lo <= x { 161 r0.Hi = x 162 } 163 break 164 } 165 } 166 167 // Update the next values for each table. 168 for i := range ti { 169 tn := &ti[i] 170 if rune(r0.Hi) < tn.next { 171 break 172 } 173 rn := tn.t.R16[tn.p] 174 stride := rune(rn.Stride) 175 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) 176 if rune(rn.Hi) < tn.next { 177 if tn.p++; int(tn.p) == len(tn.t.R16) { 178 tn.next = atEnd 179 } else { 180 tn.next = rune(tn.t.R16[tn.p].Lo) 181 } 182 } 183 } 184 185 if r0.Lo == r0.Hi { 186 r0.Stride = 1 187 } 188 189 return r0 190 } 191 192 // next32 finds the ranged to be added to the table. If ranges overlap between 193 // multiple tables it clips the result to a non-overlapping range if the 194 // elements are not fully subsumed. It returns a zero range if there are no more 195 // ranges. 196 func (ti tablesIter) next32() unicode.Range32 { 197 sortIter(ti) 198 199 t0 := ti[0] 200 if t0.next == atEnd { 201 return unicode.Range32{} 202 } 203 r0 := t0.t.R32[t0.p] 204 r0.Lo = uint32(t0.next) 205 206 // We restrict the Hi of the current range if it overlaps with another range. 207 for i := range ti { 208 tn := ti[i] 209 // Since our tableIndices are sorted by next, we can break if the there 210 // is no overlap. The first value of a next range can always be merged 211 // into the current one, so we can break in case of equality as well. 212 if rune(r0.Hi) <= tn.next { 213 break 214 } 215 rn := tn.t.R32[tn.p] 216 rn.Lo = uint32(tn.next) 217 218 // Limit r0.Hi based on next ranges in list, but allow it to overlap 219 // with ranges as long as it subsumes it. 220 m := (rn.Lo - r0.Lo) % r0.Stride 221 if m == 0 && (rn.Stride == r0.Stride || rn.Lo == rn.Hi) { 222 // Overlap, take the min of the two Hi values: for simplicity's sake 223 // we only process one range at a time. 224 if r0.Hi > rn.Hi { 225 r0.Hi = rn.Hi 226 } 227 } else { 228 // Not a compatible stride. Set to the last possible value before 229 // rn.Lo, but ensure there is at least one value. 230 if x := rn.Lo - m; r0.Lo <= x { 231 r0.Hi = x 232 } 233 break 234 } 235 } 236 237 // Update the next values for each table. 238 for i := range ti { 239 tn := &ti[i] 240 if rune(r0.Hi) < tn.next { 241 break 242 } 243 rn := tn.t.R32[tn.p] 244 stride := rune(rn.Stride) 245 tn.next += stride * (1 + ((rune(r0.Hi) - tn.next) / stride)) 246 if rune(rn.Hi) < tn.next { 247 if tn.p++; int(tn.p) == len(tn.t.R32) { 248 tn.next = atEnd 249 } else { 250 tn.next = rune(tn.t.R32[tn.p].Lo) 251 } 252 } 253 } 254 255 if r0.Lo == r0.Hi { 256 r0.Stride = 1 257 } 258 259 return r0 260 }