github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/diff/column_identity_test.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package diff_test 16 17 import ( 18 "testing" 19 20 "github.com/dolthub/dolt/go/store/val" 21 ) 22 23 type identityTest struct { 24 name string 25 left []table 26 right []table 27 matches []match 28 // non-matching tables omitted 29 } 30 31 type match struct { 32 leftTbl, rightTbl string 33 columnMatches [][2]string 34 // non-matching columns omitted 35 } 36 37 type table struct { 38 name string 39 cols []column 40 } 41 42 type column struct { 43 name string 44 enc val.Encoding 45 pk bool 46 47 // simulates heuristic column matching 48 // based on sampling fields from row data 49 sample []int 50 } 51 52 const ( 53 heuristicMatchThreshold = 0.5 54 ) 55 56 // Table matching follows a conservative algorithm: 57 // matching tables must have the same name and the same set 58 // of primary key column types (empty set for keyless tables). 59 // 60 // This algorithm could be extended to handle table renames 61 // by matching tables with equal primary key column types 62 // based on a heuristic sampling method. We could also expose 63 // user-defined mappings that manually specify table matches. 64 func TestTableMatching(t *testing.T) { 65 var tests = []identityTest{ 66 { 67 name: "smoke test", 68 left: []table{ 69 { 70 name: "t", 71 cols: []column{ 72 {name: "pk", enc: val.Int32Enc, pk: true}, 73 {name: "c0", enc: val.Int32Enc}, 74 }, 75 }, 76 }, 77 right: []table{ 78 { 79 name: "t", 80 cols: []column{ 81 {name: "pk", enc: val.Int32Enc, pk: true}, 82 {name: "c0", enc: val.Int32Enc}, 83 }, 84 }, 85 }, 86 matches: []match{ 87 { 88 leftTbl: "t", rightTbl: "t", 89 columnMatches: [][2]string{ 90 {"pk", "pk"}, 91 {"c0", "c0"}, 92 }, 93 }, 94 }, 95 }, 96 { 97 name: "primary key rename", 98 left: []table{ 99 { 100 name: "t", 101 cols: []column{ 102 {name: "a", enc: val.Int32Enc, pk: true}, 103 {name: "c0", enc: val.Int32Enc}, 104 }, 105 }, 106 }, 107 right: []table{ 108 { 109 name: "t", 110 cols: []column{ 111 {name: "b", enc: val.Int32Enc, pk: true}, 112 {name: "c0", enc: val.Int32Enc}, 113 }, 114 }, 115 }, 116 matches: []match{ 117 { 118 leftTbl: "t", rightTbl: "t", 119 columnMatches: [][2]string{ 120 {"pk", "a"}, 121 {"c0", "c0"}, 122 }, 123 }, 124 }, 125 }, 126 { 127 name: "keyless table", 128 left: []table{ 129 { 130 name: "t", 131 cols: []column{ 132 {name: "c0", enc: val.Int32Enc}, 133 {name: "c1", enc: val.Int32Enc}, 134 }, 135 }, 136 }, 137 right: []table{ 138 { 139 name: "t", 140 cols: []column{ 141 {name: "c0", enc: val.Int32Enc}, 142 {name: "c1", enc: val.Int32Enc}, 143 }, 144 }, 145 }, 146 matches: []match{ 147 { 148 leftTbl: "t", rightTbl: "t", 149 columnMatches: [][2]string{ 150 {"c0", "c0"}, 151 {"c1", "c1"}, 152 }, 153 }, 154 }, 155 }, 156 { 157 name: "table rename", 158 left: []table{ 159 { 160 name: "t1", 161 cols: []column{ 162 {name: "pk", enc: val.Int32Enc, pk: true}, 163 {name: "c0", enc: val.Int32Enc}, 164 }, 165 }, 166 }, 167 right: []table{ 168 { 169 name: "t2", 170 cols: []column{ 171 {name: "pk", enc: val.Int32Enc, pk: true}, 172 {name: "c0", enc: val.Int32Enc}, 173 }, 174 }, 175 }, 176 matches: []match{ /* no matches */ }, 177 }, 178 } 179 for _, test := range tests { 180 t.Run(test.name, func(t *testing.T) { 181 testIdentity(t, test) 182 }) 183 } 184 } 185 186 // Column matching follows table matching, 187 // primary keys have already been matched. 188 // Matching for non-primary-key is as follows: 189 // 1. equal name and type are matched 190 // 2a. keyless tables take union of remaining columns 191 // 2b. pk tables attempt to heuristically match remaining 192 // columns of equal types by sampling rows values 193 func TestColumnMatching(t *testing.T) { 194 var tests = []identityTest{ 195 { 196 name: "extra unmatched columns", 197 left: []table{ 198 { 199 name: "t", 200 cols: []column{ 201 {name: "pk", enc: val.Int32Enc, pk: true}, 202 {name: "a", enc: val.DatetimeEnc}, 203 }, 204 }, 205 }, 206 right: []table{ 207 { 208 name: "t", 209 cols: []column{ 210 {name: "pk", enc: val.Int32Enc, pk: true}, 211 {name: "b", enc: val.GeometryEnc}, 212 }, 213 }, 214 }, 215 matches: []match{ 216 { 217 leftTbl: "t", rightTbl: "t", 218 columnMatches: [][2]string{ 219 {"pk", "pk"}, 220 // columns 'a', 'b' unmatched 221 }, 222 }, 223 }, 224 }, 225 { 226 name: "unmatched columns with name collision", 227 left: []table{ 228 { 229 name: "t", 230 cols: []column{ 231 {name: "pk", enc: val.Int32Enc, pk: true}, 232 {name: "c0", enc: val.YearEnc}, 233 }, 234 }, 235 }, 236 right: []table{ 237 { 238 name: "t", 239 cols: []column{ 240 {name: "pk", enc: val.Int32Enc, pk: true}, 241 {name: "c0", enc: val.JSONEnc}, 242 }, 243 }, 244 }, 245 matches: []match{ 246 { 247 leftTbl: "t", rightTbl: "t", 248 columnMatches: [][2]string{ 249 {"pk", "pk"}, 250 // columns 'c0', 'c0' unmatched 251 }, 252 }, 253 }, 254 }, 255 { 256 name: "heuristic column matching", 257 left: []table{ 258 { 259 name: "t", 260 cols: []column{ 261 {name: "pk", enc: val.Int32Enc, pk: true}, 262 {name: "a", enc: val.Int64Enc, sample: []int{1, 2, 3, 4, 5}}, 263 {name: "b", enc: val.Int64Enc, sample: []int{6, 7, 8, 9, 10}}, 264 }, 265 }, 266 }, 267 right: []table{ 268 { 269 name: "t", 270 cols: []column{ 271 {name: "pk", enc: val.Int32Enc, pk: true}, 272 {name: "x", enc: val.Int64Enc, sample: []int{1, 2, 3, -4, -5}}, 273 {name: "y", enc: val.Int64Enc, sample: []int{6, 7, -8, -9, -10}}, 274 }, 275 }, 276 }, 277 matches: []match{ 278 { 279 leftTbl: "t", rightTbl: "t", 280 columnMatches: [][2]string{ 281 {"pk", "pk"}, 282 {"a", "x"}, 283 // columns 'b', 'y' unmatched 284 }, 285 }, 286 }, 287 }, 288 { 289 name: "keyless table union", 290 left: []table{ 291 { 292 name: "t", 293 cols: []column{ 294 {name: "c0", enc: val.Int32Enc, sample: []int{1, 2, 3, 4}}, 295 {name: "c1", enc: val.Int32Enc, sample: []int{5, 6, 7, 8}}, 296 }, 297 }, 298 }, 299 right: []table{ 300 { 301 name: "t", 302 cols: []column{ 303 {name: "c0", enc: val.Int32Enc, sample: []int{1, 2, 3, 4}}, 304 {name: "c2", enc: val.Int32Enc, sample: []int{5, 6, 7, 8}}, 305 }, 306 }, 307 }, 308 matches: []match{ 309 { 310 leftTbl: "t", rightTbl: "t", 311 columnMatches: [][2]string{ 312 {"c0", "c0"}, 313 // columns 'c1', 'c2' unmatched 314 }, 315 }, 316 }, 317 }, 318 } 319 for _, test := range tests { 320 t.Run(test.name, func(t *testing.T) { 321 testIdentity(t, test) 322 }) 323 } 324 } 325 326 func testIdentity(t *testing.T, test identityTest) { 327 t.Skip("implement me") 328 }