github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/covermerger/bq_csv_reader.go (about) 1 // Copyright 2024 syzkaller project authors. All rights reserved. 2 // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file. 3 4 package covermerger 5 6 import ( 7 "compress/gzip" 8 "context" 9 "fmt" 10 "io" 11 12 "cloud.google.com/go/bigquery" 13 "cloud.google.com/go/civil" 14 "github.com/google/syzkaller/pkg/gcs" 15 "github.com/google/syzkaller/pkg/validator" 16 "github.com/google/uuid" 17 ) 18 19 func InitNsRecords(ctx context.Context, ns, filePath, commit string, from, to civil.Date) (io.ReadCloser, error) { 20 if err := validator.AnyError("input validation failed", 21 validator.NamespaceName(ns), 22 validator.AnyOk(validator.EmptyStr(filePath), validator.KernelFilePath(filePath)), 23 validator.AnyOk(validator.EmptyStr(commit), validator.CommitHash(commit)), 24 ); err != nil { 25 return nil, err 26 } 27 sessionUUID := uuid.New().String() 28 gsBucket := "syzbot-temp" 29 gsPath := fmt.Sprintf("bq-exports/%s", sessionUUID) 30 gsURI := "gs://" + gsBucket + "/" + gsPath + "/*.csv.gz" 31 client, err := bigquery.NewClient(ctx, "syzkaller") 32 if err != nil { 33 return nil, fmt.Errorf("failed to initialize bigquery client: %w", err) 34 } 35 if err := client.EnableStorageReadClient(ctx); err != nil { 36 return nil, fmt.Errorf("failed to client.EnableStorageReadClient: %w", err) 37 } 38 selectCommit := "" 39 if commit != "" { 40 selectCommit = fmt.Sprintf("AND\n\t\t\t\t\tkernel_commit = \"%s\"", commit) 41 } 42 q := client.Query(fmt.Sprintf(` 43 EXPORT DATA 44 OPTIONS ( 45 uri = "%s", 46 format = "CSV", 47 overwrite = true, 48 header = true, 49 compression = "GZIP") 50 AS ( 51 SELECT 52 kernel_repo, kernel_branch, kernel_commit, file_path, func_name, manager, sl, SUM(hit_count) as hit_count 53 FROM syzkaller.syzbot_coverage.`+"`%s`"+` 54 WHERE 55 TIMESTAMP_TRUNC(timestamp, DAY) >= "%s" AND 56 TIMESTAMP_TRUNC(timestamp, DAY) <= "%s" AND 57 version = 1 AND 58 starts_with(file_path, "%s") %s 59 GROUP BY file_path, func_name, manager, kernel_commit, kernel_repo, kernel_branch, sl 60 ORDER BY file_path, manager 61 ); 62 `, gsURI, ns, from.String(), to.String(), filePath, selectCommit)) 63 job, err := q.Run(ctx) 64 if err != nil { 65 return nil, fmt.Errorf("err during bigquery.Run: %w", err) 66 } 67 status, err := job.Wait(ctx) 68 if err != nil { 69 return nil, fmt.Errorf("err waiting for the bigquery.Job: %w", err) 70 } 71 if status.Err() != nil { 72 return nil, fmt.Errorf("bigquery job failed with status %w", status.Err()) 73 } 74 return initGCSMultiReader(ctx, gsBucket, gsPath) 75 } 76 77 func initGCSMultiReader(ctx context.Context, bucket, path string) (io.ReadCloser, error) { 78 var gcsClient gcs.Client 79 var err error 80 if gcsClient, err = gcs.NewClient(ctx); err != nil { 81 return nil, fmt.Errorf("err creating gcs client: %w", err) 82 } 83 var gcsFiles []*gcs.Object 84 if gcsFiles, err = gcsClient.ListObjects(bucket + "/" + path); err != nil { 85 return nil, fmt.Errorf("err enumerating gcs files: %w", err) 86 } 87 paths := []string{} 88 for _, obj := range gcsFiles { 89 paths = append(paths, bucket+"/"+obj.Path) 90 } 91 return &gcsGZIPMultiReader{ 92 gcsClient: gcsClient, 93 gcsFiles: paths, 94 }, nil 95 } 96 97 type gcsGZIPMultiReader struct { 98 gcsClient gcs.Client 99 gcsFiles []string 100 101 curFileReader io.ReadCloser 102 curGZReadCloser io.ReadCloser 103 } 104 105 func (mr *gcsGZIPMultiReader) Read(p []byte) (int, error) { 106 for len(mr.gcsFiles) > 0 { 107 if mr.curFileReader == nil { 108 var err error 109 if mr.curFileReader, err = mr.gcsClient.FileReader(mr.gcsFiles[0]); err != nil { 110 return 0, fmt.Errorf("failed to get %s reader: %w", mr.gcsFiles[0], err) 111 } 112 if mr.curGZReadCloser, err = gzip.NewReader(mr.curFileReader); err != nil { 113 mr.curGZReadCloser = nil // gzip.NewReader returns *Reader(nil) on corrupted header 114 return 0, fmt.Errorf("err calling gzip.NewReader: %w", err) 115 } 116 } 117 n, err := mr.curGZReadCloser.Read(p) 118 if err == io.EOF { 119 mr.gcsFiles = mr.gcsFiles[1:] 120 if err := mr.Close(); err != nil { 121 return 0, fmt.Errorf("mr.Close: %w", err) 122 } 123 } 124 if n > 0 || err != io.EOF { 125 if err == io.EOF && len(mr.gcsFiles) > 0 { 126 // Don't return EOF yet. More readers remain. 127 err = nil 128 } 129 return n, err 130 } 131 } 132 return 0, io.EOF 133 } 134 135 func (mr *gcsGZIPMultiReader) Close() error { 136 var err1, err2 error 137 if mr.curGZReadCloser != nil { 138 err1 = mr.curGZReadCloser.Close() 139 } 140 if mr.curFileReader != nil { 141 err2 = mr.curFileReader.Close() 142 } 143 mr.curFileReader = nil 144 mr.curGZReadCloser = nil 145 if err1 != nil { 146 return fmt.Errorf("mr.curGZReadCloser.Close: %w", err1) 147 } 148 if err2 != nil { 149 return fmt.Errorf("mr.curFileReader.Close: %w", err2) 150 } 151 return nil 152 }