github.com/google/syzkaller@v0.0.0-20251211124644-a066d2bc4b02/pkg/covermerger/bq_csv_reader.go (about)

     1  // Copyright 2024 syzkaller project authors. All rights reserved.
     2  // Use of this source code is governed by Apache 2 LICENSE that can be found in the LICENSE file.
     3  
     4  package covermerger
     5  
     6  import (
     7  	"compress/gzip"
     8  	"context"
     9  	"fmt"
    10  	"io"
    11  
    12  	"cloud.google.com/go/bigquery"
    13  	"cloud.google.com/go/civil"
    14  	"github.com/google/syzkaller/pkg/gcs"
    15  	"github.com/google/syzkaller/pkg/validator"
    16  	"github.com/google/uuid"
    17  )
    18  
    19  func InitNsRecords(ctx context.Context, ns, filePath, commit string, from, to civil.Date) (io.ReadCloser, error) {
    20  	if err := validator.AnyError("input validation failed",
    21  		validator.NamespaceName(ns),
    22  		validator.AnyOk(validator.EmptyStr(filePath), validator.KernelFilePath(filePath)),
    23  		validator.AnyOk(validator.EmptyStr(commit), validator.CommitHash(commit)),
    24  	); err != nil {
    25  		return nil, err
    26  	}
    27  	sessionUUID := uuid.New().String()
    28  	gsBucket := "syzbot-temp"
    29  	gsPath := fmt.Sprintf("bq-exports/%s", sessionUUID)
    30  	gsURI := "gs://" + gsBucket + "/" + gsPath + "/*.csv.gz"
    31  	client, err := bigquery.NewClient(ctx, "syzkaller")
    32  	if err != nil {
    33  		return nil, fmt.Errorf("failed to initialize bigquery client: %w", err)
    34  	}
    35  	if err := client.EnableStorageReadClient(ctx); err != nil {
    36  		return nil, fmt.Errorf("failed to client.EnableStorageReadClient: %w", err)
    37  	}
    38  	selectCommit := ""
    39  	if commit != "" {
    40  		selectCommit = fmt.Sprintf("AND\n\t\t\t\t\tkernel_commit = \"%s\"", commit)
    41  	}
    42  	q := client.Query(fmt.Sprintf(`
    43  		EXPORT DATA
    44  			OPTIONS (
    45  				uri = "%s",
    46  				format = "CSV",
    47  				overwrite = true,
    48  				header = true,
    49  				compression = "GZIP")
    50  			AS (
    51  				SELECT
    52  					kernel_repo, kernel_branch, kernel_commit, file_path, func_name, manager, sl, SUM(hit_count) as hit_count
    53  				FROM syzkaller.syzbot_coverage.`+"`%s`"+`
    54  				WHERE
    55  					TIMESTAMP_TRUNC(timestamp, DAY) >= "%s" AND
    56  					TIMESTAMP_TRUNC(timestamp, DAY) <= "%s" AND
    57  					version = 1 AND
    58  					starts_with(file_path, "%s") %s
    59  				GROUP BY file_path, func_name, manager, kernel_commit, kernel_repo, kernel_branch, sl
    60  				ORDER BY file_path, manager
    61  			);
    62  	`, gsURI, ns, from.String(), to.String(), filePath, selectCommit))
    63  	job, err := q.Run(ctx)
    64  	if err != nil {
    65  		return nil, fmt.Errorf("err during bigquery.Run: %w", err)
    66  	}
    67  	status, err := job.Wait(ctx)
    68  	if err != nil {
    69  		return nil, fmt.Errorf("err waiting for the bigquery.Job: %w", err)
    70  	}
    71  	if status.Err() != nil {
    72  		return nil, fmt.Errorf("bigquery job failed with status %w", status.Err())
    73  	}
    74  	return initGCSMultiReader(ctx, gsBucket, gsPath)
    75  }
    76  
    77  func initGCSMultiReader(ctx context.Context, bucket, path string) (io.ReadCloser, error) {
    78  	var gcsClient gcs.Client
    79  	var err error
    80  	if gcsClient, err = gcs.NewClient(ctx); err != nil {
    81  		return nil, fmt.Errorf("err creating gcs client: %w", err)
    82  	}
    83  	var gcsFiles []*gcs.Object
    84  	if gcsFiles, err = gcsClient.ListObjects(bucket + "/" + path); err != nil {
    85  		return nil, fmt.Errorf("err enumerating gcs files: %w", err)
    86  	}
    87  	paths := []string{}
    88  	for _, obj := range gcsFiles {
    89  		paths = append(paths, bucket+"/"+obj.Path)
    90  	}
    91  	return &gcsGZIPMultiReader{
    92  		gcsClient: gcsClient,
    93  		gcsFiles:  paths,
    94  	}, nil
    95  }
    96  
    97  type gcsGZIPMultiReader struct {
    98  	gcsClient gcs.Client
    99  	gcsFiles  []string
   100  
   101  	curFileReader   io.ReadCloser
   102  	curGZReadCloser io.ReadCloser
   103  }
   104  
   105  func (mr *gcsGZIPMultiReader) Read(p []byte) (int, error) {
   106  	for len(mr.gcsFiles) > 0 {
   107  		if mr.curFileReader == nil {
   108  			var err error
   109  			if mr.curFileReader, err = mr.gcsClient.FileReader(mr.gcsFiles[0]); err != nil {
   110  				return 0, fmt.Errorf("failed to get %s reader: %w", mr.gcsFiles[0], err)
   111  			}
   112  			if mr.curGZReadCloser, err = gzip.NewReader(mr.curFileReader); err != nil {
   113  				mr.curGZReadCloser = nil // gzip.NewReader returns *Reader(nil) on corrupted header
   114  				return 0, fmt.Errorf("err calling gzip.NewReader: %w", err)
   115  			}
   116  		}
   117  		n, err := mr.curGZReadCloser.Read(p)
   118  		if err == io.EOF {
   119  			mr.gcsFiles = mr.gcsFiles[1:]
   120  			if err := mr.Close(); err != nil {
   121  				return 0, fmt.Errorf("mr.Close: %w", err)
   122  			}
   123  		}
   124  		if n > 0 || err != io.EOF {
   125  			if err == io.EOF && len(mr.gcsFiles) > 0 {
   126  				// Don't return EOF yet. More readers remain.
   127  				err = nil
   128  			}
   129  			return n, err
   130  		}
   131  	}
   132  	return 0, io.EOF
   133  }
   134  
   135  func (mr *gcsGZIPMultiReader) Close() error {
   136  	var err1, err2 error
   137  	if mr.curGZReadCloser != nil {
   138  		err1 = mr.curGZReadCloser.Close()
   139  	}
   140  	if mr.curFileReader != nil {
   141  		err2 = mr.curFileReader.Close()
   142  	}
   143  	mr.curFileReader = nil
   144  	mr.curGZReadCloser = nil
   145  	if err1 != nil {
   146  		return fmt.Errorf("mr.curGZReadCloser.Close: %w", err1)
   147  	}
   148  	if err2 != nil {
   149  		return fmt.Errorf("mr.curFileReader.Close: %w", err2)
   150  	}
   151  	return nil
   152  }