github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/ctclone/ctclone.go (about)

     1  // Copyright 2021 Google LLC. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // ctclone is a one-shot tool for downloading entries from a CT log.
    16  package main
    17  
    18  import (
    19  	"context"
    20  	"encoding/json"
    21  	"flag"
    22  	"fmt"
    23  	"net/url"
    24  	"strings"
    25  
    26  	"github.com/golang/glog"
    27  	"github.com/google/trillian-examples/clone/internal/cloner"
    28  	"github.com/google/trillian-examples/clone/internal/download"
    29  	"github.com/google/trillian-examples/clone/logdb"
    30  
    31  	_ "github.com/go-sql-driver/mysql"
    32  )
    33  
    34  var (
    35  	logURL         = flag.String("log_url", "", "Log storage root URL, e.g. https://ct.googleapis.com/rocketeer/")
    36  	mysqlURI       = flag.String("mysql_uri", "", "URL of a MySQL database to clone the log into. The DB should contain only one log.")
    37  	fetchBatchSize = flag.Uint("fetch_batch_size", 32, "The number of entries to fetch from the log in each request.")
    38  	writeBatchSize = flag.Uint("write_batch_size", 32, "The number of leaves to write in each DB transaction.")
    39  	workers        = flag.Uint("workers", 2, "The number of worker threads to run in parallel to fetch entries.")
    40  )
    41  
    42  func main() {
    43  	flag.Parse()
    44  
    45  	if !strings.HasSuffix(*logURL, "/") {
    46  		glog.Exit("'log_url' must end with '/'")
    47  	}
    48  	if len(*mysqlURI) == 0 {
    49  		glog.Exit("Missing required parameter 'mysql_uri'")
    50  	}
    51  	lu, err := url.Parse(*logURL)
    52  	if err != nil {
    53  		glog.Exitf("log_url is invalid: %v", err)
    54  	}
    55  
    56  	ctx := context.Background()
    57  	db, err := logdb.NewDatabase(*mysqlURI)
    58  	if err != nil {
    59  		glog.Exitf("Failed to connect to database: %q", err)
    60  	}
    61  
    62  	// Get the latest checkpoint from the log we are cloning: we will download all the leaves this commits to.
    63  	fetcher := ctFetcher{download.NewHTTPFetcher(lu)}
    64  	targetCp, err := fetcher.latestCheckpoint()
    65  	if err != nil {
    66  		glog.Exitf("Failed to get latest checkpoint from log: %v", err)
    67  	}
    68  
    69  	cp := cloner.UnwrappedCheckpoint{
    70  		Size: targetCp.TreeSize,
    71  		Hash: targetCp.RootHash,
    72  		Raw:  targetCp.raw,
    73  	}
    74  
    75  	cl := cloner.New(*workers, *fetchBatchSize, *writeBatchSize, db)
    76  	if err := cl.CloneAndVerify(ctx, fetcher.Batch, cp); err != nil {
    77  		glog.Exitf("Failed to clone and verify log: %v", err)
    78  	}
    79  }
    80  
    81  // fetcher gets data paths. This allows impl to be swapped for tests.
    82  type fetcher interface {
    83  	// GetData gets the data at the given path, or returns an error.
    84  	GetData(path string) ([]byte, error)
    85  }
    86  
    87  type ctFetcher struct {
    88  	f fetcher
    89  }
    90  
    91  // Batch provides a mechanism to fetch a range of leaves.
    92  // Enough leaves are fetched to fully fill `leaves`, or an error is returned.
    93  // This implements batch.BatchFetch.
    94  func (cf ctFetcher) Batch(start uint64, leaves [][]byte) (uint64, error) {
    95  	// CT API gets [start, end] not [start, end).
    96  	last := start + uint64(len(leaves)) - 1
    97  	data, err := cf.f.GetData(fmt.Sprintf("ct/v1/get-entries?start=%d&end=%d", start, last))
    98  	if err != nil {
    99  		return 0, fmt.Errorf("fetcher.GetData: %w", err)
   100  	}
   101  	var r getEntriesResponse
   102  	if err := json.Unmarshal(data, &r); err != nil {
   103  		return 0, fmt.Errorf("json.Unmarshal of %d bytes: %w", len(data), err)
   104  	}
   105  	if got, want := len(r.Leaves), len(leaves); got != want {
   106  		return uint64(len(r.Leaves)), fmt.Errorf("wanted %d leaves but got %d", want, got)
   107  	}
   108  	for i, l := range r.Leaves {
   109  		leaves[i] = l.Data
   110  	}
   111  	return uint64(len(r.Leaves)), nil
   112  }
   113  
   114  func (cf ctFetcher) latestCheckpoint() (CTCheckpointResponse, error) {
   115  	r := CTCheckpointResponse{}
   116  	cpbs, err := cf.f.GetData("ct/v1/get-sth")
   117  	if err != nil {
   118  		return r, fmt.Errorf("failed to find latest log checkpoint: %v", err)
   119  	}
   120  	if err := json.Unmarshal(cpbs, &r); err != nil {
   121  		return r, fmt.Errorf("failed to parse checkpoint: %v", err)
   122  	}
   123  	r.raw = cpbs
   124  	return r, nil
   125  }
   126  
   127  type getEntriesResponse struct {
   128  	Leaves []leafInput `json:"entries"`
   129  }
   130  
   131  type leafInput struct {
   132  	Data []byte `json:"leaf_input"`
   133  }
   134  
   135  // CTCheckpointResponse mirrors the RFC6962 STH format for `get-sth` to allow the
   136  // data to be easy unmarshalled from the JSON response.
   137  // TODO(mhutchinson): this was copied from ctverify. Deduplicate.
   138  type CTCheckpointResponse struct {
   139  	TreeSize  uint64 `json:"tree_size"`
   140  	Timestamp uint64 `json:"timestamp"`
   141  	RootHash  []byte `json:"sha256_root_hash"`
   142  	Sig       []byte `json:"tree_head_signature"`
   143  
   144  	raw []byte
   145  }