github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/table/editor/creation/index.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package creation 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io" 22 "strings" 23 24 "github.com/dolthub/go-mysql-server/sql" 25 26 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 27 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb/durable" 28 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 29 "github.com/dolthub/dolt/go/libraries/doltcore/sqle/index" 30 "github.com/dolthub/dolt/go/libraries/doltcore/table/editor" 31 "github.com/dolthub/dolt/go/store/prolly" 32 "github.com/dolthub/dolt/go/store/prolly/tree" 33 "github.com/dolthub/dolt/go/store/types" 34 "github.com/dolthub/dolt/go/store/val" 35 ) 36 37 type CreateIndexReturn struct { 38 NewTable *doltdb.Table 39 Sch schema.Schema 40 OldIndex schema.Index 41 NewIndex schema.Index 42 } 43 44 // CreateIndex creates the given index on the given table with the given schema. Returns the updated table, updated schema, and created index. 45 func CreateIndex( 46 ctx *sql.Context, 47 table *doltdb.Table, 48 tableName, indexName string, 49 columns []string, 50 prefixLengths []uint16, 51 props schema.IndexProperties, 52 opts editor.Options, 53 ) (*CreateIndexReturn, error) { 54 sch, err := table.GetSchema(ctx) 55 if err != nil { 56 return nil, err 57 } 58 59 // get the real column names as CREATE INDEX columns are case-insensitive 60 var realColNames []string 61 allTableCols := sch.GetAllCols() 62 for _, indexCol := range columns { 63 tableCol, ok := allTableCols.GetByNameCaseInsensitive(indexCol) 64 if !ok { 65 return nil, fmt.Errorf("column `%s` does not exist for the table", indexCol) 66 } 67 realColNames = append(realColNames, tableCol.Name) 68 } 69 70 if indexName == "" { 71 indexName = strings.Join(realColNames, "") 72 _, ok := sch.Indexes().GetByNameCaseInsensitive(indexName) 73 var i int 74 for ok { 75 i++ 76 indexName = fmt.Sprintf("%s_%d", strings.Join(realColNames, ""), i) 77 _, ok = sch.Indexes().GetByNameCaseInsensitive(indexName) 78 } 79 } 80 if !doltdb.IsValidIdentifier(indexName) { 81 return nil, fmt.Errorf("invalid index name `%s`", indexName) 82 } 83 84 // if an index was already created for the column set but was not generated by the user then we replace it 85 existingIndex, ok := sch.Indexes().GetIndexByColumnNames(realColNames...) 86 if ok && !existingIndex.IsUserDefined() { 87 _, err = sch.Indexes().RemoveIndex(existingIndex.Name()) 88 if err != nil { 89 return nil, err 90 } 91 table, err = table.DeleteIndexRowData(ctx, existingIndex.Name()) 92 if err != nil { 93 return nil, err 94 } 95 } 96 97 // create the index metadata, will error if index names are taken or an index with the same columns in the same order exists 98 index, err := sch.Indexes().AddIndexByColNames( 99 indexName, 100 realColNames, 101 prefixLengths, 102 props, 103 ) 104 if err != nil { 105 return nil, err 106 } 107 108 // update the table schema with the new index 109 newTable, err := table.UpdateSchema(ctx, sch) 110 if err != nil { 111 return nil, err 112 } 113 114 // TODO: in the case that we're replacing an implicit index with one the user specified, we could do this more 115 // cheaply in some cases by just renaming it, rather than building it from scratch. But that's harder to get right. 116 indexRows, err := BuildSecondaryIndex(ctx, newTable, index, tableName, opts) 117 if err != nil { 118 return nil, err 119 } 120 121 newTable, err = newTable.SetIndexRows(ctx, index.Name(), indexRows) 122 if err != nil { 123 return nil, err 124 } 125 126 return &CreateIndexReturn{ 127 NewTable: newTable, 128 Sch: sch, 129 OldIndex: existingIndex, 130 NewIndex: index, 131 }, nil 132 } 133 134 func BuildSecondaryIndex(ctx *sql.Context, tbl *doltdb.Table, idx schema.Index, tableName string, opts editor.Options) (durable.Index, error) { 135 switch tbl.Format() { 136 case types.Format_LD_1: 137 m, err := editor.RebuildIndex(ctx, tbl, idx.Name(), opts) 138 if err != nil { 139 return nil, err 140 } 141 return durable.IndexFromNomsMap(m, tbl.ValueReadWriter(), tbl.NodeStore()), nil 142 143 case types.Format_DOLT: 144 sch, err := tbl.GetSchema(ctx) 145 if err != nil { 146 return nil, err 147 } 148 m, err := tbl.GetRowData(ctx) 149 if err != nil { 150 return nil, err 151 } 152 primary := durable.ProllyMapFromIndex(m) 153 return BuildSecondaryProllyIndex(ctx, tbl.ValueReadWriter(), tbl.NodeStore(), sch, tableName, idx, primary) 154 155 default: 156 return nil, fmt.Errorf("unknown NomsBinFormat") 157 } 158 } 159 160 // BuildSecondaryProllyIndex builds secondary index data for the given primary 161 // index row data |primary|. |sch| is the current schema of the table. 162 func BuildSecondaryProllyIndex( 163 ctx *sql.Context, 164 vrw types.ValueReadWriter, 165 ns tree.NodeStore, 166 sch schema.Schema, 167 tableName string, 168 idx schema.Index, 169 primary prolly.Map, 170 ) (durable.Index, error) { 171 var uniqCb DupEntryCb 172 if idx.IsUnique() { 173 kd := idx.Schema().GetKeyDescriptor() 174 uniqCb = func(ctx context.Context, existingKey, newKey val.Tuple) error { 175 msg := FormatKeyForUniqKeyErr(newKey, kd) 176 return sql.NewUniqueKeyErr(msg, false, nil) 177 } 178 } 179 return BuildProllyIndexExternal(ctx, vrw, ns, sch, tableName, idx, primary, uniqCb) 180 } 181 182 // FormatKeyForUniqKeyErr formats the given tuple |key| using |d|. The resulting 183 // string is suitable for use in a sql.UniqueKeyError 184 // This is copied from the writer package to avoid pulling in that dependency and prevent cycles 185 func FormatKeyForUniqKeyErr(key val.Tuple, d val.TupleDesc) string { 186 var sb strings.Builder 187 sb.WriteString("[") 188 seenOne := false 189 for i := range d.Types { 190 if seenOne { 191 sb.WriteString(",") 192 } 193 seenOne = true 194 sb.WriteString(d.FormatValue(i, key.GetField(i))) 195 } 196 sb.WriteString("]") 197 return sb.String() 198 } 199 200 // DupEntryCb receives duplicate unique index entries. 201 type DupEntryCb func(ctx context.Context, existingKey, newKey val.Tuple) error 202 203 // BuildUniqueProllyIndex builds a unique index based on the given |primary| row 204 // data. If any duplicate entries are found, they are passed to |cb|. If |cb| 205 // returns a non-nil error then the process is stopped. 206 func BuildUniqueProllyIndex( 207 ctx *sql.Context, 208 vrw types.ValueReadWriter, 209 ns tree.NodeStore, 210 sch schema.Schema, 211 tableName string, 212 idx schema.Index, 213 primary prolly.Map, 214 cb DupEntryCb, 215 ) (durable.Index, error) { 216 empty, err := durable.NewEmptyIndex(ctx, vrw, ns, idx.Schema()) 217 if err != nil { 218 return nil, err 219 } 220 secondary := durable.ProllyMapFromIndex(empty) 221 if schema.IsKeyless(sch) { 222 secondary = prolly.ConvertToSecondaryKeylessIndex(secondary) 223 } 224 225 iter, err := primary.IterAll(ctx) 226 if err != nil { 227 return nil, err 228 } 229 p := primary.Pool() 230 231 prefixDesc := secondary.KeyDesc().PrefixDesc(idx.Count()) 232 secondaryBld, err := index.NewSecondaryKeyBuilder(ctx, tableName, sch, idx, secondary.KeyDesc(), p, secondary.NodeStore()) 233 if err != nil { 234 return nil, err 235 } 236 237 mut := secondary.Mutate() 238 for { 239 var k, v val.Tuple 240 k, v, err = iter.Next(ctx) 241 if err == io.EOF { 242 break 243 } else if err != nil { 244 return nil, err 245 } 246 247 idxKey, err := secondaryBld.SecondaryKeyFromRow(ctx, k, v) 248 if err != nil { 249 return nil, err 250 } 251 252 if prefixDesc.HasNulls(idxKey) { 253 continue 254 } 255 256 err = mut.GetPrefix(ctx, idxKey, prefixDesc, func(existingKey, _ val.Tuple) error { 257 // register a constraint violation if |idxKey| collides with |existingKey| 258 if existingKey != nil { 259 return cb(ctx, existingKey, idxKey) 260 } 261 return nil 262 }) 263 if err != nil { 264 return nil, err 265 } 266 267 if err = mut.Put(ctx, idxKey, val.EmptyTuple); err != nil { 268 return nil, err 269 } 270 } 271 272 secondary, err = mut.Map(ctx) 273 if err != nil { 274 return nil, err 275 } 276 return durable.IndexFromProllyMap(secondary), nil 277 } 278 279 // PrefixItr iterates all keys of a given prefix |p| and its descriptor |d| in map |m|. 280 // todo(andy): move to pkg prolly 281 type PrefixItr struct { 282 itr prolly.MapIter 283 p val.Tuple 284 d val.TupleDesc 285 } 286 287 func NewPrefixItr(ctx context.Context, p val.Tuple, d val.TupleDesc, m rangeIterator) (PrefixItr, error) { 288 rng := prolly.PrefixRange(p, d) 289 itr, err := m.IterRange(ctx, rng) 290 if err != nil { 291 return PrefixItr{}, err 292 } 293 return PrefixItr{p: p, d: d, itr: itr}, nil 294 } 295 296 func (itr PrefixItr) Next(ctx context.Context) (k, v val.Tuple, err error) { 297 OUTER: 298 for { 299 k, v, err = itr.itr.Next(ctx) 300 if err != nil { 301 return nil, nil, err 302 } 303 304 // check if p is a prefix of k 305 // range iteration currently can return keys not in the range 306 for i := 0; i < itr.p.Count(); i++ { 307 f1 := itr.p.GetField(i) 308 f2 := k.GetField(i) 309 if bytes.Compare(f1, f2) != 0 { 310 // if a field in the prefix does not match |k|, go to the next row 311 continue OUTER 312 } 313 } 314 315 return k, v, nil 316 } 317 } 318 319 type rangeIterator interface { 320 IterRange(ctx context.Context, rng prolly.Range) (prolly.MapIter, error) 321 } 322 323 var _ error = (*prollyUniqueKeyErr)(nil) 324 325 // prollyUniqueKeyErr is an error that is returned when a unique constraint has been violated. It contains the index key 326 // (which is the full row). 327 type prollyUniqueKeyErr struct { 328 k val.Tuple 329 kd val.TupleDesc 330 IndexName string 331 } 332 333 // Error implements the error interface. 334 func (u *prollyUniqueKeyErr) Error() string { 335 keyStr, _ := formatKey(u.k, u.kd) 336 return fmt.Sprintf("duplicate unique key given: %s", keyStr) 337 } 338 339 // formatKey returns a comma-separated string representation of the key given 340 // that matches the output of the old format. 341 func formatKey(key val.Tuple, td val.TupleDesc) (string, error) { 342 vals := make([]string, td.Count()) 343 for i := 0; i < td.Count(); i++ { 344 vals[i] = td.FormatValue(i, key.GetField(i)) 345 } 346 347 return fmt.Sprintf("[%s]", strings.Join(vals, ",")), nil 348 }