github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/serverlessclone/serverlessclone.go (about)

     1  // Copyright 2021 Google LLC. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // serverlessclone is a one-shot tool for downloading entries from an
    16  // HTTP(s) exposed transparency log generated by the serverless tooling.
    17  package main
    18  
    19  import (
    20  	"context"
    21  	"flag"
    22  	"fmt"
    23  	"io"
    24  	"net/http"
    25  	"net/url"
    26  	"os"
    27  	"time"
    28  
    29  	"github.com/golang/glog"
    30  	"github.com/google/trillian-examples/clone/internal/cloner"
    31  	"github.com/google/trillian-examples/clone/logdb"
    32  	"github.com/transparency-dev/serverless-log/client"
    33  	"golang.org/x/mod/sumdb/note"
    34  
    35  	_ "github.com/go-sql-driver/mysql"
    36  )
    37  
    38  var (
    39  	logURL   = flag.String("url", "", "The base URL for the log HTTP API, should end with a trailing slash")
    40  	vkey     = flag.String("vkey", "", "The verification key for the log checkpoints")
    41  	origin   = flag.String("origin", "", "The origin string for the log checkpoints")
    42  	mysqlURI = flag.String("mysql_uri", "", "URL of a MySQL database to clone the log into. The DB should contain only one log.")
    43  
    44  	writeBatchSize = flag.Uint("write_batch_size", 100, "The number of leaves to write in each DB transaction.")
    45  	workers        = flag.Uint("workers", 50, "The number of worker threads to run in parallel to fetch entries.")
    46  	timeout        = flag.Duration("timeout", 10*time.Second, "Maximum time to wait for http connections to complete.")
    47  )
    48  
    49  func main() {
    50  	flag.Parse()
    51  
    52  	if *logURL == "" {
    53  		glog.Exit("Missing required parameter 'url'")
    54  	}
    55  	if *vkey == "" {
    56  		glog.Exit("Missing required parameter 'vkey'")
    57  	}
    58  	if *origin == "" {
    59  		glog.Exit("Missing required parameter 'origin'")
    60  	}
    61  	if *mysqlURI == "" {
    62  		glog.Exit("Missing required parameter 'mysql_uri'")
    63  	}
    64  
    65  	ctx := context.Background()
    66  	db, err := logdb.NewDatabase(*mysqlURI)
    67  	if err != nil {
    68  		glog.Exitf("Failed to connect to database: %q", err)
    69  	}
    70  
    71  	v, err := note.NewVerifier(*vkey)
    72  	if err != nil {
    73  		glog.Exitf("Failed to create verifier: %v", err)
    74  	}
    75  	u, err := url.Parse(*logURL)
    76  	if err != nil {
    77  		glog.Exitf("Invalid log URL %q: %v", *logURL, err)
    78  	}
    79  	f := newFetcher(u)
    80  
    81  	targetCp, rawCp, _, err := client.FetchCheckpoint(ctx, f, v, *origin)
    82  	if err != nil {
    83  		glog.Exitf("Failed to get latest checkpoint from log: %v", err)
    84  	}
    85  	glog.Infof("Target checkpoint is for tree size %d", targetCp.Size)
    86  
    87  	cp := cloner.UnwrappedCheckpoint{
    88  		Size: targetCp.Size,
    89  		Hash: targetCp.Hash,
    90  		Raw:  rawCp,
    91  	}
    92  	if err := clone(ctx, db, f, cp); err != nil {
    93  		glog.Exitf("Failed to clone: %v", err)
    94  	}
    95  }
    96  
    97  func clone(ctx context.Context, db *logdb.Database, f client.Fetcher, targetCp cloner.UnwrappedCheckpoint) error {
    98  	cl := cloner.New(*workers, 1, *writeBatchSize, db)
    99  
   100  	next, err := cl.Next()
   101  	if err != nil {
   102  		return fmt.Errorf("couldn't determine first leaf to fetch: %v", err)
   103  	}
   104  	// TODO(mhutchinson): other implementations don't have this check. Is this redundant,
   105  	// OR can it be moved deeper into the call stack?
   106  	if next >= uint64(targetCp.Size) {
   107  		glog.Infof("No work to do. Local tree size = %d, latest log tree size = %d", next, targetCp.Size)
   108  		return nil
   109  	}
   110  
   111  	batchFetch := func(start uint64, leaves [][]byte) (uint64, error) {
   112  		if len(leaves) != 1 {
   113  			return 0, fmt.Errorf("true batch fetching not supported")
   114  		}
   115  		leaf, err := client.GetLeaf(ctx, f, start)
   116  		leaves[0] = leaf
   117  		return 1, err
   118  	}
   119  
   120  	if err := cl.CloneAndVerify(ctx, batchFetch, targetCp); err != nil {
   121  		return fmt.Errorf("failed to clone and verify log: %v", err)
   122  	}
   123  	return nil
   124  }
   125  
   126  // newFetcher creates a Fetcher for the log at the given root location.
   127  func newFetcher(root *url.URL) client.Fetcher {
   128  	get := getByScheme[root.Scheme]
   129  	if get == nil {
   130  		panic(fmt.Errorf("unsupported URL scheme %s", root.Scheme))
   131  	}
   132  
   133  	return func(ctx context.Context, p string) ([]byte, error) {
   134  		u, err := root.Parse(p)
   135  		if err != nil {
   136  			return nil, err
   137  		}
   138  		return get(ctx, u)
   139  	}
   140  }
   141  
   142  var getByScheme = map[string]func(context.Context, *url.URL) ([]byte, error){
   143  	"http":  readHTTP,
   144  	"https": readHTTP,
   145  }
   146  
   147  func readHTTP(ctx context.Context, u *url.URL) ([]byte, error) {
   148  	req, err := http.NewRequest("GET", u.String(), nil)
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  	c := http.Client{
   153  		Timeout: *timeout,
   154  	}
   155  	resp, err := c.Do(req.WithContext(ctx))
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  	switch resp.StatusCode {
   160  	case 404:
   161  		glog.Infof("Not found: %q", u.String())
   162  		return nil, os.ErrNotExist
   163  	case 200:
   164  		break
   165  	default:
   166  		return nil, fmt.Errorf("unexpected http status %q", resp.Status)
   167  	}
   168  	defer func() {
   169  		if err := resp.Body.Close(); err != nil {
   170  			glog.Errorf("resp.Body.Close(): %v", err)
   171  		}
   172  	}()
   173  	return io.ReadAll(resp.Body)
   174  }