github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/migrate/dump.go (about) 1 /* 2 * Copyright 2022 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package migrate 18 19 import ( 20 "bufio" 21 "database/sql" 22 "fmt" 23 "strings" 24 25 "github.com/pkg/errors" 26 ) 27 28 // dumpMeta serves as the global knowledge oracle that stores 29 // all the tables' info, 30 // all the tables' generation guide, 31 // the writer to output the generated RDF entries, 32 // the writer to output the Dgraph schema, 33 // and a sqlPool to read information from MySQL 34 type dumpMeta struct { 35 tableInfos map[string]*sqlTable 36 tableGuides map[string]*tableGuide 37 dataWriter *bufio.Writer 38 schemaWriter *bufio.Writer 39 sqlPool *sql.DB 40 41 buf strings.Builder // reusable buf for building strings, call buf.Reset before use 42 } 43 44 // sqlRow captures values in a SQL table row, as well as the metadata associated 45 // with the row 46 type sqlRow struct { 47 values []interface{} 48 blankNodeLabel string 49 tableInfo *sqlTable 50 } 51 52 // dumpSchema generates the Dgraph schema based on m.tableGuides 53 // and sends the schema to m.schemaWriter 54 func (m *dumpMeta) dumpSchema() error { 55 for table := range m.tableGuides { 56 tableInfo := m.tableInfos[table] 57 for _, index := range createDgraphSchema(tableInfo) { 58 _, err := m.schemaWriter.WriteString(index) 59 if err != nil { 60 return errors.Wrapf(err, "while writing schema") 61 } 62 } 63 } 64 return m.schemaWriter.Flush() 65 } 66 67 // dumpTables goes through all the tables twice. In the first time it generates RDF entries for the 68 // column values. In the second time, it follows the foreign key constraints in SQL tables, and 69 // generate the corresponding Dgraph edges. 70 func (m *dumpMeta) dumpTables() error { 71 for table := range m.tableInfos { 72 fmt.Printf("Dumping table %s\n", table) 73 if err := m.dumpTable(table); err != nil { 74 return errors.Wrapf(err, "while dumping table %s", table) 75 } 76 } 77 78 for table := range m.tableInfos { 79 fmt.Printf("Dumping table constraints %s\n", table) 80 if err := m.dumpTableConstraints(table); err != nil { 81 return errors.Wrapf(err, "while dumping table %s", table) 82 } 83 } 84 85 return m.dataWriter.Flush() 86 } 87 88 // dumpTable converts the cells in a SQL table into RDF entries, 89 // and sends entries to the m.dataWriter 90 func (m *dumpMeta) dumpTable(table string) error { 91 tableGuide := m.tableGuides[table] 92 tableInfo := m.tableInfos[table] 93 94 query := fmt.Sprintf(`select %s from %s`, strings.Join(tableInfo.columnNames, ","), table) 95 rows, err := m.sqlPool.Query(query) 96 if err != nil { 97 return err 98 } 99 defer rows.Close() 100 101 // populate the predNames 102 for _, column := range tableInfo.columnNames { 103 tableInfo.predNames = append(tableInfo.predNames, 104 predicateName(tableInfo, column)) 105 } 106 107 row := &sqlRow{ 108 tableInfo: tableInfo, 109 } 110 111 for rows.Next() { 112 // step 1: read the row's column values 113 colValues, err := getColumnValues(tableInfo.columnNames, tableInfo.columnDataTypes, rows) 114 if err != nil { 115 return err 116 } 117 row.values = colValues 118 119 // step 2: output the column values in RDF format 120 row.blankNodeLabel = tableGuide.blankNode.generate(tableInfo, colValues) 121 m.outputRow(row, tableInfo) 122 123 // step 3: record mappings to the blankNodeLabel so that future tables can look up the 124 // blankNodeLabel 125 tableGuide.valuesRecorder.record(tableInfo, colValues, row.blankNodeLabel) 126 } 127 128 return nil 129 } 130 131 // dumpTableConstraints reads data from a table, and then generate RDF entries 132 // from a row to another row in a foreign table by following columns with foreign key constraints. 133 // It then sends the generated RDF entries to the m.dataWriter 134 func (m *dumpMeta) dumpTableConstraints(table string) error { 135 tableGuide := m.tableGuides[table] 136 tableInfo := m.tableInfos[table] 137 138 query := fmt.Sprintf(`select %s from %s`, strings.Join(tableInfo.columnNames, ","), table) 139 rows, err := m.sqlPool.Query(query) 140 if err != nil { 141 return err 142 } 143 defer rows.Close() 144 145 row := &sqlRow{ 146 tableInfo: tableInfo, 147 } 148 for rows.Next() { 149 // step 1: read the row's column values 150 colValues, err := getColumnValues(tableInfo.columnNames, tableInfo.columnDataTypes, rows) 151 if err != nil { 152 return err 153 } 154 row.values = colValues 155 156 // step 2: output the constraints in RDF format 157 row.blankNodeLabel = tableGuide.blankNode.generate(tableInfo, colValues) 158 159 m.outputConstraints(row, tableInfo) 160 } 161 162 return nil 163 } 164 165 // outputRow takes a row with its metadata as well as the table metadata, and 166 // spits out one or more RDF entries to the dumpMeta's dataWriter. 167 // Consider the following table "salary" 168 // person_company varchar (50) 169 // person_employee_id int 170 // salary float 171 // foreign key (person_company, person_employee_id) references person (company, employee_id) 172 173 // A row with the following values in the table 174 // Google, 100, 50.0 (salary) 175 // where Google is the person_company, 100 is the employee id, and 50.0 is the salary rate 176 // will cause the following RDF entries to be generated 177 // _:salary_1 <salary_person_company> "Google" . 178 // _:salary_1 <salary_person_employee_id> "100" . 179 // _:salary_1 <salary_person_salary> "50.0" . 180 // _:salary_1 <salary_person_company_person_employee_id> _:person_2. 181 // In the RDF output, _:salary_1 is this row's blank node label; 182 // salary_person_company, salary_person_employee_id, and salary_person_salary 183 // are the predicate names constructed by appending the column names after the table name "salary". 184 185 // The last RDF entry is a Dgraph edge created by following the foreign key reference. 186 // Its predicate name is constructed by concatenating the table name, and each column's name in 187 // alphabetical order. The object _:person_2 is the blank node label from the person table, 188 // and it's generated through a lookup in the person table using the "ref label" 189 // _:person_company_Google_employee_id_100. The mapping from the ref label 190 // _:person_company_Google_employee_id_100 to the foreign blank node _:person_2 191 // is recorded through the person table's valuesRecorder. 192 func (m *dumpMeta) outputRow(row *sqlRow, tableInfo *sqlTable) { 193 for i, colValue := range row.values { 194 colName := tableInfo.columnNames[i] 195 if !tableInfo.isForeignKey[colName] { 196 predicate := tableInfo.predNames[i] 197 m.outputPlainCell(row.blankNodeLabel, predicate, tableInfo.columnDataTypes[i], colValue) 198 } 199 } 200 } 201 202 func (m *dumpMeta) outputConstraints(row *sqlRow, tableInfo *sqlTable) { 203 for _, constraint := range tableInfo.foreignKeyConstraints { 204 if len(constraint.parts) == 0 { 205 logger.Fatalf("The constraint should have at least one part: %v", constraint) 206 } 207 208 foreignTableName := constraint.parts[0].remoteTableName 209 210 refLabel, err := row.getRefLabelFromConstraint(m.tableInfos[foreignTableName], constraint) 211 if err != nil { 212 if !quiet { 213 logger.Printf("ignoring the constraint because of error "+ 214 "when getting ref label: %+v\n", err) 215 } 216 return 217 } 218 foreignBlankNode := m.tableGuides[foreignTableName].valuesRecorder.getBlankNode(refLabel) 219 m.outputPlainCell(row.blankNodeLabel, 220 getPredFromConstraint(tableInfo.tableName, separator, constraint), uidType, 221 foreignBlankNode) 222 } 223 } 224 225 // outputPlainCell sends to the writer a RDF where the subject is the blankNode 226 // the predicate is the predName, and the object is the colValue 227 func (m *dumpMeta) outputPlainCell(blankNode string, predName string, dataType dataType, 228 colValue interface{}) { 229 // Each cell value should be stored under a predicate 230 m.buf.Reset() 231 fmt.Fprintf(&m.buf, "%s <%s> ", blankNode, predName) 232 233 switch dataType { 234 case stringType: 235 fmt.Fprintf(&m.buf, "%q .\n", colValue) 236 case uidType: 237 fmt.Fprintf(&m.buf, "%s .\n", colValue) 238 default: 239 objectVal, err := getValue(dataType, colValue) 240 if err != nil { 241 if !quiet { 242 logger.Printf("ignoring object %v because of error when getting value: %v", 243 colValue, err) 244 } 245 return 246 } 247 248 fmt.Fprintf(&m.buf, "\"%v\" .\n", objectVal) 249 } 250 251 // send the buf to writer 252 fmt.Fprintf(m.dataWriter, "%s", m.buf.String()) 253 } 254 255 // getRefLabelFromConstraint returns a ref label based on a foreign key constraint. 256 // Consider the foreign key constraint 257 // foreign key (person_company, person_employee_id) references person (company, employee_id) 258 // and a row with the following values in the table 259 // Google, 100, 50.0 (salary) 260 // where Google is the person_company, 100 is the employee id, and 50.0 is the salary rate 261 // the refLabel will use the foreign table name, foreign column names and the local row's values, 262 // yielding the value of _:person_company_Google_employee_id_100 263 func (row *sqlRow) getRefLabelFromConstraint(foreignTableInfo *sqlTable, 264 constraint *fkConstraint) (string, error) { 265 if constraint.foreignIndices == nil { 266 foreignKeyColumnNames := make(map[string]string) 267 for _, part := range constraint.parts { 268 foreignKeyColumnNames[part.columnName] = part.remoteColumnName 269 } 270 271 constraint.foreignIndices = getColumnIndices(row.tableInfo, 272 func(info *sqlTable, column string) bool { 273 _, ok := foreignKeyColumnNames[column] 274 return ok 275 }) 276 277 // replace the column names to be the foreign column names 278 for _, colIdx := range constraint.foreignIndices { 279 colIdx.name = foreignKeyColumnNames[colIdx.name] 280 } 281 } 282 283 return createLabel(&ref{ 284 allColumns: foreignTableInfo.columns, 285 refColumnIndices: constraint.foreignIndices, 286 tableName: foreignTableInfo.tableName, 287 colValues: row.values, 288 }) 289 }