github.com/dshekhar95/sub_dgraph@v0.0.0-20230424164411-6be28e40bbf1/dgraph/cmd/migrate/dump.go (about)

     1  /*
     2   * Copyright 2022 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package migrate
    18  
    19  import (
    20  	"bufio"
    21  	"database/sql"
    22  	"fmt"
    23  	"strings"
    24  
    25  	"github.com/pkg/errors"
    26  )
    27  
    28  // dumpMeta serves as the global knowledge oracle that stores
    29  // all the tables' info,
    30  // all the tables' generation guide,
    31  // the writer to output the generated RDF entries,
    32  // the writer to output the Dgraph schema,
    33  // and a sqlPool to read information from MySQL
    34  type dumpMeta struct {
    35  	tableInfos   map[string]*sqlTable
    36  	tableGuides  map[string]*tableGuide
    37  	dataWriter   *bufio.Writer
    38  	schemaWriter *bufio.Writer
    39  	sqlPool      *sql.DB
    40  
    41  	buf strings.Builder // reusable buf for building strings, call buf.Reset before use
    42  }
    43  
    44  // sqlRow captures values in a SQL table row, as well as the metadata associated
    45  // with the row
    46  type sqlRow struct {
    47  	values         []interface{}
    48  	blankNodeLabel string
    49  	tableInfo      *sqlTable
    50  }
    51  
    52  // dumpSchema generates the Dgraph schema based on m.tableGuides
    53  // and sends the schema to m.schemaWriter
    54  func (m *dumpMeta) dumpSchema() error {
    55  	for table := range m.tableGuides {
    56  		tableInfo := m.tableInfos[table]
    57  		for _, index := range createDgraphSchema(tableInfo) {
    58  			_, err := m.schemaWriter.WriteString(index)
    59  			if err != nil {
    60  				return errors.Wrapf(err, "while writing schema")
    61  			}
    62  		}
    63  	}
    64  	return m.schemaWriter.Flush()
    65  }
    66  
    67  // dumpTables goes through all the tables twice. In the first time it generates RDF entries for the
    68  // column values. In the second time, it follows the foreign key constraints in SQL tables, and
    69  // generate the corresponding Dgraph edges.
    70  func (m *dumpMeta) dumpTables() error {
    71  	for table := range m.tableInfos {
    72  		fmt.Printf("Dumping table %s\n", table)
    73  		if err := m.dumpTable(table); err != nil {
    74  			return errors.Wrapf(err, "while dumping table %s", table)
    75  		}
    76  	}
    77  
    78  	for table := range m.tableInfos {
    79  		fmt.Printf("Dumping table constraints %s\n", table)
    80  		if err := m.dumpTableConstraints(table); err != nil {
    81  			return errors.Wrapf(err, "while dumping table %s", table)
    82  		}
    83  	}
    84  
    85  	return m.dataWriter.Flush()
    86  }
    87  
    88  // dumpTable converts the cells in a SQL table into RDF entries,
    89  // and sends entries to the m.dataWriter
    90  func (m *dumpMeta) dumpTable(table string) error {
    91  	tableGuide := m.tableGuides[table]
    92  	tableInfo := m.tableInfos[table]
    93  
    94  	query := fmt.Sprintf(`select %s from %s`, strings.Join(tableInfo.columnNames, ","), table)
    95  	rows, err := m.sqlPool.Query(query)
    96  	if err != nil {
    97  		return err
    98  	}
    99  	defer rows.Close()
   100  
   101  	// populate the predNames
   102  	for _, column := range tableInfo.columnNames {
   103  		tableInfo.predNames = append(tableInfo.predNames,
   104  			predicateName(tableInfo, column))
   105  	}
   106  
   107  	row := &sqlRow{
   108  		tableInfo: tableInfo,
   109  	}
   110  
   111  	for rows.Next() {
   112  		// step 1: read the row's column values
   113  		colValues, err := getColumnValues(tableInfo.columnNames, tableInfo.columnDataTypes, rows)
   114  		if err != nil {
   115  			return err
   116  		}
   117  		row.values = colValues
   118  
   119  		// step 2: output the column values in RDF format
   120  		row.blankNodeLabel = tableGuide.blankNode.generate(tableInfo, colValues)
   121  		m.outputRow(row, tableInfo)
   122  
   123  		// step 3: record mappings to the blankNodeLabel so that future tables can look up the
   124  		// blankNodeLabel
   125  		tableGuide.valuesRecorder.record(tableInfo, colValues, row.blankNodeLabel)
   126  	}
   127  
   128  	return nil
   129  }
   130  
   131  // dumpTableConstraints reads data from a table, and then generate RDF entries
   132  // from a row to another row in a foreign table by following columns with foreign key constraints.
   133  // It then sends the generated RDF entries to the m.dataWriter
   134  func (m *dumpMeta) dumpTableConstraints(table string) error {
   135  	tableGuide := m.tableGuides[table]
   136  	tableInfo := m.tableInfos[table]
   137  
   138  	query := fmt.Sprintf(`select %s from %s`, strings.Join(tableInfo.columnNames, ","), table)
   139  	rows, err := m.sqlPool.Query(query)
   140  	if err != nil {
   141  		return err
   142  	}
   143  	defer rows.Close()
   144  
   145  	row := &sqlRow{
   146  		tableInfo: tableInfo,
   147  	}
   148  	for rows.Next() {
   149  		// step 1: read the row's column values
   150  		colValues, err := getColumnValues(tableInfo.columnNames, tableInfo.columnDataTypes, rows)
   151  		if err != nil {
   152  			return err
   153  		}
   154  		row.values = colValues
   155  
   156  		// step 2: output the constraints in RDF format
   157  		row.blankNodeLabel = tableGuide.blankNode.generate(tableInfo, colValues)
   158  
   159  		m.outputConstraints(row, tableInfo)
   160  	}
   161  
   162  	return nil
   163  }
   164  
   165  // outputRow takes a row with its metadata as well as the table metadata, and
   166  // spits out one or more RDF entries to the dumpMeta's dataWriter.
   167  // Consider the following table "salary"
   168  // person_company varchar (50)
   169  // person_employee_id int
   170  // salary float
   171  // foreign key (person_company, person_employee_id) references person (company, employee_id)
   172  
   173  // A row with the following values in the table
   174  // Google, 100, 50.0 (salary)
   175  // where Google is the person_company, 100 is the employee id, and 50.0 is the salary rate
   176  // will cause the following RDF entries to be generated
   177  // _:salary_1 <salary_person_company> "Google" .
   178  // _:salary_1 <salary_person_employee_id> "100" .
   179  // _:salary_1 <salary_person_salary> "50.0" .
   180  // _:salary_1 <salary_person_company_person_employee_id> _:person_2.
   181  // In the RDF output, _:salary_1 is this row's blank node label;
   182  // salary_person_company, salary_person_employee_id, and salary_person_salary
   183  // are the predicate names constructed by appending the column names after the table name "salary".
   184  
   185  // The last RDF entry is a Dgraph edge created by following the foreign key reference.
   186  // Its predicate name is constructed by concatenating the table name, and each column's name in
   187  // alphabetical order. The object _:person_2 is the blank node label from the person table,
   188  // and it's generated through a lookup in the person table using the "ref label"
   189  // _:person_company_Google_employee_id_100. The mapping from the ref label
   190  // _:person_company_Google_employee_id_100 to the foreign blank node _:person_2
   191  // is recorded through the person table's valuesRecorder.
   192  func (m *dumpMeta) outputRow(row *sqlRow, tableInfo *sqlTable) {
   193  	for i, colValue := range row.values {
   194  		colName := tableInfo.columnNames[i]
   195  		if !tableInfo.isForeignKey[colName] {
   196  			predicate := tableInfo.predNames[i]
   197  			m.outputPlainCell(row.blankNodeLabel, predicate, tableInfo.columnDataTypes[i], colValue)
   198  		}
   199  	}
   200  }
   201  
   202  func (m *dumpMeta) outputConstraints(row *sqlRow, tableInfo *sqlTable) {
   203  	for _, constraint := range tableInfo.foreignKeyConstraints {
   204  		if len(constraint.parts) == 0 {
   205  			logger.Fatalf("The constraint should have at least one part: %v", constraint)
   206  		}
   207  
   208  		foreignTableName := constraint.parts[0].remoteTableName
   209  
   210  		refLabel, err := row.getRefLabelFromConstraint(m.tableInfos[foreignTableName], constraint)
   211  		if err != nil {
   212  			if !quiet {
   213  				logger.Printf("ignoring the constraint because of error "+
   214  					"when getting ref label: %+v\n", err)
   215  			}
   216  			return
   217  		}
   218  		foreignBlankNode := m.tableGuides[foreignTableName].valuesRecorder.getBlankNode(refLabel)
   219  		m.outputPlainCell(row.blankNodeLabel,
   220  			getPredFromConstraint(tableInfo.tableName, separator, constraint), uidType,
   221  			foreignBlankNode)
   222  	}
   223  }
   224  
   225  // outputPlainCell sends to the writer a RDF where the subject is the blankNode
   226  // the predicate is the predName, and the object is the colValue
   227  func (m *dumpMeta) outputPlainCell(blankNode string, predName string, dataType dataType,
   228  	colValue interface{}) {
   229  	// Each cell value should be stored under a predicate
   230  	m.buf.Reset()
   231  	fmt.Fprintf(&m.buf, "%s <%s> ", blankNode, predName)
   232  
   233  	switch dataType {
   234  	case stringType:
   235  		fmt.Fprintf(&m.buf, "%q .\n", colValue)
   236  	case uidType:
   237  		fmt.Fprintf(&m.buf, "%s .\n", colValue)
   238  	default:
   239  		objectVal, err := getValue(dataType, colValue)
   240  		if err != nil {
   241  			if !quiet {
   242  				logger.Printf("ignoring object %v because of error when getting value: %v",
   243  					colValue, err)
   244  			}
   245  			return
   246  		}
   247  
   248  		fmt.Fprintf(&m.buf, "\"%v\" .\n", objectVal)
   249  	}
   250  
   251  	// send the buf to writer
   252  	fmt.Fprintf(m.dataWriter, "%s", m.buf.String())
   253  }
   254  
   255  // getRefLabelFromConstraint returns a ref label based on a foreign key constraint.
   256  // Consider the foreign key constraint
   257  // foreign key (person_company, person_employee_id) references person (company, employee_id)
   258  // and a row with the following values in the table
   259  // Google, 100, 50.0 (salary)
   260  // where Google is the person_company, 100 is the employee id, and 50.0 is the salary rate
   261  // the refLabel will use the foreign table name, foreign column names and the local row's values,
   262  // yielding the value of _:person_company_Google_employee_id_100
   263  func (row *sqlRow) getRefLabelFromConstraint(foreignTableInfo *sqlTable,
   264  	constraint *fkConstraint) (string, error) {
   265  	if constraint.foreignIndices == nil {
   266  		foreignKeyColumnNames := make(map[string]string)
   267  		for _, part := range constraint.parts {
   268  			foreignKeyColumnNames[part.columnName] = part.remoteColumnName
   269  		}
   270  
   271  		constraint.foreignIndices = getColumnIndices(row.tableInfo,
   272  			func(info *sqlTable, column string) bool {
   273  				_, ok := foreignKeyColumnNames[column]
   274  				return ok
   275  			})
   276  
   277  		// replace the column names to be the foreign column names
   278  		for _, colIdx := range constraint.foreignIndices {
   279  			colIdx.name = foreignKeyColumnNames[colIdx.name]
   280  		}
   281  	}
   282  
   283  	return createLabel(&ref{
   284  		allColumns:       foreignTableInfo.columns,
   285  		refColumnIndices: constraint.foreignIndices,
   286  		tableName:        foreignTableInfo.tableName,
   287  		colValues:        row.values,
   288  	})
   289  }