github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/ctclone/ctclone.go (about) 1 // Copyright 2021 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // ctclone is a one-shot tool for downloading entries from a CT log. 16 package main 17 18 import ( 19 "context" 20 "encoding/json" 21 "flag" 22 "fmt" 23 "net/url" 24 "strings" 25 26 "github.com/golang/glog" 27 "github.com/google/trillian-examples/clone/internal/cloner" 28 "github.com/google/trillian-examples/clone/internal/download" 29 "github.com/google/trillian-examples/clone/logdb" 30 31 _ "github.com/go-sql-driver/mysql" 32 ) 33 34 var ( 35 logURL = flag.String("log_url", "", "Log storage root URL, e.g. https://ct.googleapis.com/rocketeer/") 36 mysqlURI = flag.String("mysql_uri", "", "URL of a MySQL database to clone the log into. The DB should contain only one log.") 37 fetchBatchSize = flag.Uint("fetch_batch_size", 32, "The number of entries to fetch from the log in each request.") 38 writeBatchSize = flag.Uint("write_batch_size", 32, "The number of leaves to write in each DB transaction.") 39 workers = flag.Uint("workers", 2, "The number of worker threads to run in parallel to fetch entries.") 40 ) 41 42 func main() { 43 flag.Parse() 44 45 if !strings.HasSuffix(*logURL, "/") { 46 glog.Exit("'log_url' must end with '/'") 47 } 48 if len(*mysqlURI) == 0 { 49 glog.Exit("Missing required parameter 'mysql_uri'") 50 } 51 lu, err := url.Parse(*logURL) 52 if err != nil { 53 glog.Exitf("log_url is invalid: %v", err) 54 } 55 56 ctx := context.Background() 57 db, err := logdb.NewDatabase(*mysqlURI) 58 if err != nil { 59 glog.Exitf("Failed to connect to database: %q", err) 60 } 61 62 // Get the latest checkpoint from the log we are cloning: we will download all the leaves this commits to. 63 fetcher := ctFetcher{download.NewHTTPFetcher(lu)} 64 targetCp, err := fetcher.latestCheckpoint() 65 if err != nil { 66 glog.Exitf("Failed to get latest checkpoint from log: %v", err) 67 } 68 69 cp := cloner.UnwrappedCheckpoint{ 70 Size: targetCp.TreeSize, 71 Hash: targetCp.RootHash, 72 Raw: targetCp.raw, 73 } 74 75 cl := cloner.New(*workers, *fetchBatchSize, *writeBatchSize, db) 76 if err := cl.CloneAndVerify(ctx, fetcher.Batch, cp); err != nil { 77 glog.Exitf("Failed to clone and verify log: %v", err) 78 } 79 } 80 81 // fetcher gets data paths. This allows impl to be swapped for tests. 82 type fetcher interface { 83 // GetData gets the data at the given path, or returns an error. 84 GetData(path string) ([]byte, error) 85 } 86 87 type ctFetcher struct { 88 f fetcher 89 } 90 91 // Batch provides a mechanism to fetch a range of leaves. 92 // Enough leaves are fetched to fully fill `leaves`, or an error is returned. 93 // This implements batch.BatchFetch. 94 func (cf ctFetcher) Batch(start uint64, leaves [][]byte) (uint64, error) { 95 // CT API gets [start, end] not [start, end). 96 last := start + uint64(len(leaves)) - 1 97 data, err := cf.f.GetData(fmt.Sprintf("ct/v1/get-entries?start=%d&end=%d", start, last)) 98 if err != nil { 99 return 0, fmt.Errorf("fetcher.GetData: %w", err) 100 } 101 var r getEntriesResponse 102 if err := json.Unmarshal(data, &r); err != nil { 103 return 0, fmt.Errorf("json.Unmarshal of %d bytes: %w", len(data), err) 104 } 105 if got, want := len(r.Leaves), len(leaves); got != want { 106 return uint64(len(r.Leaves)), fmt.Errorf("wanted %d leaves but got %d", want, got) 107 } 108 for i, l := range r.Leaves { 109 leaves[i] = l.Data 110 } 111 return uint64(len(r.Leaves)), nil 112 } 113 114 func (cf ctFetcher) latestCheckpoint() (CTCheckpointResponse, error) { 115 r := CTCheckpointResponse{} 116 cpbs, err := cf.f.GetData("ct/v1/get-sth") 117 if err != nil { 118 return r, fmt.Errorf("failed to find latest log checkpoint: %v", err) 119 } 120 if err := json.Unmarshal(cpbs, &r); err != nil { 121 return r, fmt.Errorf("failed to parse checkpoint: %v", err) 122 } 123 r.raw = cpbs 124 return r, nil 125 } 126 127 type getEntriesResponse struct { 128 Leaves []leafInput `json:"entries"` 129 } 130 131 type leafInput struct { 132 Data []byte `json:"leaf_input"` 133 } 134 135 // CTCheckpointResponse mirrors the RFC6962 STH format for `get-sth` to allow the 136 // data to be easy unmarshalled from the JSON response. 137 // TODO(mhutchinson): this was copied from ctverify. Deduplicate. 138 type CTCheckpointResponse struct { 139 TreeSize uint64 `json:"tree_size"` 140 Timestamp uint64 `json:"timestamp"` 141 RootHash []byte `json:"sha256_root_hash"` 142 Sig []byte `json:"tree_head_signature"` 143 144 raw []byte 145 }