github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/ccl/importccl/read_import_avro.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Licensed as a CockroachDB Enterprise file under the Cockroach Community 4 // License (the "License"); you may not use this file except in compliance with 5 // the License. You may obtain a copy of the License at 6 // 7 // https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt 8 9 package importccl 10 11 import ( 12 "bufio" 13 "bytes" 14 "context" 15 "fmt" 16 "io" 17 "unicode/utf8" 18 19 "github.com/cockroachdb/cockroach/pkg/roachpb" 20 "github.com/cockroachdb/cockroach/pkg/sql/lex" 21 "github.com/cockroachdb/cockroach/pkg/sql/row" 22 "github.com/cockroachdb/cockroach/pkg/sql/sem/tree" 23 "github.com/cockroachdb/cockroach/pkg/sql/sqlbase" 24 "github.com/cockroachdb/cockroach/pkg/sql/types" 25 "github.com/cockroachdb/cockroach/pkg/storage/cloud" 26 "github.com/cockroachdb/cockroach/pkg/util/ctxgroup" 27 "github.com/linkedin/goavro" 28 ) 29 30 // nativeToDatum converts go native type (interface{} as 31 // returned by goavro library) to the datum of appropriate type. 32 // 33 // While Avro specification is fairly broad, and supports arbitrary complex 34 // data types, this method concerns itself only with the primitive avro types, 35 // which include: 36 // null, boolean, int (32), long (64), float (32), double (64), 37 // bytes, string, and arrays of the above. 38 // 39 // Avro record is, essentially, a key->value mapping from field name to field value. 40 // A field->value mapping may be represented directly (i.e. the 41 // interface{} pass in will have corresponding go primitive type): 42 // user_id:123 -- that is the interface{} type will be int, and it's value is 123. 43 // 44 // Or, we could see field_name:null, if the field is nullable and is null. 45 // 46 // Or, we could see e.g. user_id:{"int":123}, if field called user_id can 47 // be either null, or an int and the value of the field is 123. The value in 48 // this case is another interface{} which should be a map[string]interface{}, 49 // where the key is a primitive Avro type name ("string", "long", etc). 50 func nativeToDatum( 51 x interface{}, targetT *types.T, avroT []string, evalCtx *tree.EvalContext, 52 ) (tree.Datum, error) { 53 var d tree.Datum 54 55 switch v := x.(type) { 56 case nil: 57 // Immediately return DNull, and let target 58 // table schema verify whether nulls are allowed. 59 return tree.DNull, nil 60 case bool: 61 if v { 62 d = tree.DBoolTrue 63 } else { 64 d = tree.DBoolFalse 65 } 66 case int: 67 d = tree.NewDInt(tree.DInt(v)) 68 case int32: 69 d = tree.NewDInt(tree.DInt(v)) 70 case int64: 71 d = tree.NewDInt(tree.DInt(v)) 72 case float32: 73 d = tree.NewDFloat(tree.DFloat(v)) 74 case float64: 75 d = tree.NewDFloat(tree.DFloat(v)) 76 case []byte: 77 if targetT.Identical(types.Bytes) { 78 d = tree.NewDBytes(tree.DBytes(v)) 79 } else { 80 // []byte arrays are hard. Sometimes we want []bytes, sometimes 81 // we want StringFamily. So, instead of creating DBytes datum, 82 // parse this data to "cast" it to our expected type. 83 return sqlbase.ParseDatumStringAs(targetT, string(v), evalCtx) 84 } 85 case string: 86 // We allow strings to be specified for any column, as 87 // long as we can convert the string value to the target type. 88 return sqlbase.ParseDatumStringAs(targetT, v, evalCtx) 89 case map[string]interface{}: 90 for _, aT := range avroT { 91 // The value passed in is an avro schema. Extract 92 // possible primitive types from the dictionary and 93 // attempt to convert those values to our target type. 94 if val, ok := v[aT]; ok { 95 return nativeToDatum(val, targetT, avroT, evalCtx) 96 } 97 } 98 case []interface{}: 99 // Verify target type is an array we know how to handle. 100 if targetT.ArrayContents() == nil { 101 return nil, fmt.Errorf("cannot convert array to non-array type %s", targetT) 102 } 103 eltAvroT, ok := familyToAvroT[targetT.ArrayContents().Family()] 104 if !ok { 105 return nil, fmt.Errorf("cannot convert avro array element to %s", targetT.ArrayContents()) 106 } 107 108 // Convert each element. 109 arr := tree.NewDArray(targetT.ArrayContents()) 110 for _, elt := range v { 111 eltDatum, err := nativeToDatum(elt, targetT.ArrayContents(), eltAvroT, evalCtx) 112 if err == nil { 113 err = arr.Append(eltDatum) 114 } 115 if err != nil { 116 return nil, err 117 } 118 } 119 d = arr 120 } 121 122 if d == nil { 123 return nil, fmt.Errorf("cannot handle type %T when converting to %s", x, targetT) 124 } 125 126 if !targetT.Equivalent(d.ResolvedType()) { 127 return nil, fmt.Errorf("cannot convert type %s to %s", d.ResolvedType(), targetT) 128 } 129 130 return d, nil 131 } 132 133 // A mapping from supported types.Family to the list of avro 134 // type names that can be used to construct our target type. 135 var familyToAvroT = map[types.Family][]string{ 136 // Primitive avro types. 137 types.BoolFamily: {"bool", "boolean", "string"}, 138 types.IntFamily: {"int", "long", "string"}, 139 types.FloatFamily: {"float", "double", "string"}, 140 types.StringFamily: {"string", "bytes"}, 141 types.BytesFamily: {"bytes", "string"}, 142 143 // Arrays can be specified as avro array type, or we can try parsing string. 144 types.ArrayFamily: {"array", "string"}, 145 146 // Families we can try to convert using string conversion. 147 types.UuidFamily: {"string"}, 148 types.DateFamily: {"string"}, 149 types.TimeFamily: {"string"}, 150 types.IntervalFamily: {"string"}, 151 types.TimestampTZFamily: {"string"}, 152 types.TimestampFamily: {"string"}, 153 types.CollatedStringFamily: {"string"}, 154 types.INetFamily: {"string"}, 155 types.JsonFamily: {"string"}, 156 types.BitFamily: {"string"}, 157 types.DecimalFamily: {"string"}, 158 } 159 160 // avroConsumer implements importRowConsumer interface. 161 type avroConsumer struct { 162 importCtx *parallelImportContext 163 fieldNameToIdx map[string]int 164 strict bool 165 } 166 167 // Converts avro record to datums as expected by DatumRowConverter. 168 func (a *avroConsumer) convertNative(x interface{}, conv *row.DatumRowConverter) error { 169 record, ok := x.(map[string]interface{}) 170 if !ok { 171 return fmt.Errorf("unexpected native type; expected map[string]interface{} found %T instead", x) 172 } 173 174 for f, v := range record { 175 field := lex.NormalizeName(f) 176 idx, ok := a.fieldNameToIdx[field] 177 if !ok { 178 if a.strict { 179 return fmt.Errorf("could not find column for record field %s", field) 180 } 181 continue 182 } 183 184 typ := conv.VisibleColTypes[idx] 185 avroT, ok := familyToAvroT[typ.Family()] 186 if !ok { 187 return fmt.Errorf("cannot convert avro value %v to col %s", v, conv.VisibleCols[idx].Type.Name()) 188 } 189 190 datum, err := nativeToDatum(v, typ, avroT, conv.EvalCtx) 191 if err != nil { 192 return err 193 } 194 conv.Datums[idx] = datum 195 } 196 return nil 197 } 198 199 // FillDatums implements importRowStream interface. 200 func (a *avroConsumer) FillDatums( 201 native interface{}, rowIndex int64, conv *row.DatumRowConverter, 202 ) error { 203 if err := a.convertNative(native, conv); err != nil { 204 return err 205 } 206 207 // Set any nil datums to DNull (in case native 208 // record didn't have the value set at all) 209 for i := range conv.Datums { 210 if _, isTargetCol := conv.IsTargetCol[i]; isTargetCol && conv.Datums[i] == nil { 211 if a.strict { 212 return fmt.Errorf("field %s was not set in the avro import", conv.VisibleCols[i].Name) 213 } 214 conv.Datums[i] = tree.DNull 215 } 216 } 217 return nil 218 } 219 220 var _ importRowConsumer = &avroConsumer{} 221 222 // An OCF (object container file) input scanner 223 type ocfStream struct { 224 ocf *goavro.OCFReader 225 progress func() float32 226 err error 227 } 228 229 var _ importRowProducer = &ocfStream{} 230 231 // Progress implements importRowProducer interface 232 func (o *ocfStream) Progress() float32 { 233 if o.progress != nil { 234 return o.progress() 235 } 236 return 0 237 } 238 239 // Scan implements importRowProducer interface. 240 func (o *ocfStream) Scan() bool { 241 return o.ocf.Scan() 242 } 243 244 // Err implements importRowProducer interface. 245 func (o *ocfStream) Err() error { 246 return o.err 247 } 248 249 // Row implements importRowProducer interface. 250 func (o *ocfStream) Row() (interface{}, error) { 251 return o.ocf.Read() 252 } 253 254 // Skip implements importRowProducer interface. 255 func (o *ocfStream) Skip() error { 256 _, o.err = o.ocf.Read() 257 return o.err 258 } 259 260 // A scanner over a file containing avro records in json or binary format. 261 type avroRecordStream struct { 262 importCtx *parallelImportContext 263 opts *roachpb.AvroOptions 264 input *fileReader 265 codec *goavro.Codec 266 row interface{} // Row to return 267 buf []byte // Buffered data from input. See note in fill() method. 268 eof bool // Input eof reached 269 err error // Error, other than io.EOF 270 trimLeft bool // Trim record separator at the start of the buffer. 271 maxBufSize int // Error if buf exceeds this threshold 272 minBufSize int // Issue additional reads if buffer below this threshold 273 readSize int // Read that many bytes at a time. 274 } 275 276 var _ importRowProducer = &avroRecordStream{} 277 278 func (r *avroRecordStream) Progress() float32 { 279 return r.input.ReadFraction() 280 } 281 282 func (r *avroRecordStream) trimRecordSeparator() bool { 283 if r.opts.RecordSeparator == 0 { 284 return true 285 } 286 287 if len(r.buf) > 0 { 288 c, n := utf8.DecodeRune(r.buf) 289 if n > 0 && c == r.opts.RecordSeparator { 290 r.buf = r.buf[n:] 291 return true 292 } 293 } 294 return false 295 } 296 297 func (r *avroRecordStream) fill(sz int) { 298 if r.eof || r.err != nil { 299 return 300 } 301 302 // NB: We use bytes.Buffer for writing into our internal buf, but we cannot 303 // use bytes.Buffer for reading. The reason is that bytes.Buffer tries 304 // to be efficient in its memory management. In particular, it can reuse 305 // underlying memory if the buffer becomes empty (buf = buf[:0]). This is 306 // problematic for us because the avro stream sends interface{} objects 307 // to the consumer workers. Those interface objects may (infrequently) 308 // reference the underlying byte array from which those interface objects 309 // were constructed (e.g. if we are decoding avro bytes data type, we may 310 // actually return []byte as an interface{} referencing underlying buffer). 311 // To avoid this unpleasant situation, we never reset the head of our 312 // buffer. 313 sink := bytes.NewBuffer(r.buf) 314 _, r.err = io.CopyN(sink, r.input, int64(sz)) 315 r.buf = sink.Bytes() 316 317 if r.err == io.EOF { 318 r.eof = true 319 r.err = nil 320 } 321 } 322 323 // Scan implements importRowProducer interface. 324 func (r *avroRecordStream) Scan() bool { 325 if r.row != nil { 326 panic("must call Row() or Skip() before calling Scan()") 327 } 328 329 r.readNative() 330 return r.err == nil && (!r.eof || r.row != nil) 331 } 332 333 // Err implements importRowProducer interface. 334 func (r *avroRecordStream) Err() error { 335 return r.err 336 } 337 338 func (r *avroRecordStream) decode() (interface{}, []byte, error) { 339 if r.opts.Format == roachpb.AvroOptions_BIN_RECORDS { 340 return r.codec.NativeFromBinary(r.buf) 341 } 342 return r.codec.NativeFromTextual(r.buf) 343 } 344 345 func (r *avroRecordStream) readNative() { 346 var remaining []byte 347 var decodeErr error 348 r.row = nil 349 350 canReadMoreData := func() bool { 351 return !r.eof && len(r.buf) < r.maxBufSize 352 } 353 354 for sz := r.readSize; r.row == nil && (len(r.buf) > 0 || canReadMoreData()); sz *= 2 { 355 r.fill(sz) 356 357 if r.trimLeft { 358 r.trimLeft = !r.trimRecordSeparator() 359 } 360 361 if len(r.buf) > 0 { 362 r.row, remaining, decodeErr = r.decode() 363 } 364 } 365 366 if decodeErr != nil { 367 r.err = decodeErr 368 return 369 } 370 371 r.buf = remaining 372 r.trimLeft = !r.trimRecordSeparator() 373 } 374 375 // Skip implements importRowProducer interface. 376 func (r *avroRecordStream) Skip() error { 377 r.row = nil 378 return nil 379 } 380 381 // Row implements importRowProducer interface. 382 func (r *avroRecordStream) Row() (interface{}, error) { 383 res := r.row 384 r.row = nil 385 return res, nil 386 } 387 388 func newImportAvroPipeline( 389 avro *avroInputReader, input *fileReader, 390 ) (importRowProducer, importRowConsumer, error) { 391 fieldIdxByName := make(map[string]int) 392 for idx, col := range avro.importContext.tableDesc.VisibleColumns() { 393 fieldIdxByName[col.Name] = idx 394 } 395 396 consumer := &avroConsumer{ 397 importCtx: avro.importContext, 398 fieldNameToIdx: fieldIdxByName, 399 strict: avro.opts.StrictMode, 400 } 401 402 if avro.opts.Format == roachpb.AvroOptions_OCF { 403 ocf, err := goavro.NewOCFReader(bufio.NewReaderSize(input, 64<<10)) 404 if err != nil { 405 return nil, nil, err 406 } 407 producer := &ocfStream{ 408 ocf: ocf, 409 progress: func() float32 { return input.ReadFraction() }, 410 } 411 return producer, consumer, nil 412 } 413 414 codec, err := goavro.NewCodec(avro.opts.SchemaJSON) 415 if err != nil { 416 return nil, nil, err 417 } 418 419 producer := &avroRecordStream{ 420 importCtx: avro.importContext, 421 opts: &avro.opts, 422 input: input, 423 codec: codec, 424 // We don't really know how large the records are, but if we have 425 // "too little" data in our buffer, we would probably not be able to parse 426 // avro record. So, if our available bytes is below this threshold, 427 // be proactive and read more data. 428 minBufSize: 512, 429 maxBufSize: 4 << 20, // bail out if we can't parse 4MB record. 430 readSize: 4 << 10, // Just like bufio 431 } 432 433 if int(avro.opts.MaxRecordSize) > producer.maxBufSize { 434 producer.maxBufSize = int(avro.opts.MaxRecordSize) 435 } 436 437 return producer, consumer, nil 438 } 439 440 type avroInputReader struct { 441 importContext *parallelImportContext 442 opts roachpb.AvroOptions 443 } 444 445 var _ inputConverter = &avroInputReader{} 446 447 func newAvroInputReader( 448 kvCh chan row.KVBatch, 449 tableDesc *sqlbase.TableDescriptor, 450 avroOpts roachpb.AvroOptions, 451 walltime int64, 452 parallelism int, 453 evalCtx *tree.EvalContext, 454 ) (*avroInputReader, error) { 455 456 return &avroInputReader{ 457 importContext: ¶llelImportContext{ 458 walltime: walltime, 459 numWorkers: parallelism, 460 evalCtx: evalCtx, 461 tableDesc: tableDesc, 462 kvCh: kvCh, 463 }, 464 opts: avroOpts, 465 }, nil 466 } 467 468 func (a *avroInputReader) start(group ctxgroup.Group) {} 469 470 func (a *avroInputReader) readFiles( 471 ctx context.Context, 472 dataFiles map[int32]string, 473 resumePos map[int32]int64, 474 format roachpb.IOFileFormat, 475 makeExternalStorage cloud.ExternalStorageFactory, 476 ) error { 477 return readInputFiles(ctx, dataFiles, resumePos, format, a.readFile, makeExternalStorage) 478 } 479 480 func (a *avroInputReader) readFile( 481 ctx context.Context, input *fileReader, inputIdx int32, resumePos int64, rejected chan string, 482 ) error { 483 producer, consumer, err := newImportAvroPipeline(a, input) 484 if err != nil { 485 return err 486 } 487 488 fileCtx := &importFileContext{ 489 source: inputIdx, 490 skip: resumePos, 491 rejected: rejected, 492 } 493 return runParallelImport(ctx, a.importContext, fileCtx, producer, consumer) 494 }