github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/cmd/serverlessclone/serverlessclone.go (about) 1 // Copyright 2021 Google LLC. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // serverlessclone is a one-shot tool for downloading entries from an 16 // HTTP(s) exposed transparency log generated by the serverless tooling. 17 package main 18 19 import ( 20 "context" 21 "flag" 22 "fmt" 23 "io" 24 "net/http" 25 "net/url" 26 "os" 27 "time" 28 29 "github.com/golang/glog" 30 "github.com/google/trillian-examples/clone/internal/cloner" 31 "github.com/google/trillian-examples/clone/logdb" 32 "github.com/transparency-dev/serverless-log/client" 33 "golang.org/x/mod/sumdb/note" 34 35 _ "github.com/go-sql-driver/mysql" 36 ) 37 38 var ( 39 logURL = flag.String("url", "", "The base URL for the log HTTP API, should end with a trailing slash") 40 vkey = flag.String("vkey", "", "The verification key for the log checkpoints") 41 origin = flag.String("origin", "", "The origin string for the log checkpoints") 42 mysqlURI = flag.String("mysql_uri", "", "URL of a MySQL database to clone the log into. The DB should contain only one log.") 43 44 writeBatchSize = flag.Uint("write_batch_size", 100, "The number of leaves to write in each DB transaction.") 45 workers = flag.Uint("workers", 50, "The number of worker threads to run in parallel to fetch entries.") 46 timeout = flag.Duration("timeout", 10*time.Second, "Maximum time to wait for http connections to complete.") 47 ) 48 49 func main() { 50 flag.Parse() 51 52 if *logURL == "" { 53 glog.Exit("Missing required parameter 'url'") 54 } 55 if *vkey == "" { 56 glog.Exit("Missing required parameter 'vkey'") 57 } 58 if *origin == "" { 59 glog.Exit("Missing required parameter 'origin'") 60 } 61 if *mysqlURI == "" { 62 glog.Exit("Missing required parameter 'mysql_uri'") 63 } 64 65 ctx := context.Background() 66 db, err := logdb.NewDatabase(*mysqlURI) 67 if err != nil { 68 glog.Exitf("Failed to connect to database: %q", err) 69 } 70 71 v, err := note.NewVerifier(*vkey) 72 if err != nil { 73 glog.Exitf("Failed to create verifier: %v", err) 74 } 75 u, err := url.Parse(*logURL) 76 if err != nil { 77 glog.Exitf("Invalid log URL %q: %v", *logURL, err) 78 } 79 f := newFetcher(u) 80 81 targetCp, rawCp, _, err := client.FetchCheckpoint(ctx, f, v, *origin) 82 if err != nil { 83 glog.Exitf("Failed to get latest checkpoint from log: %v", err) 84 } 85 glog.Infof("Target checkpoint is for tree size %d", targetCp.Size) 86 87 cp := cloner.UnwrappedCheckpoint{ 88 Size: targetCp.Size, 89 Hash: targetCp.Hash, 90 Raw: rawCp, 91 } 92 if err := clone(ctx, db, f, cp); err != nil { 93 glog.Exitf("Failed to clone: %v", err) 94 } 95 } 96 97 func clone(ctx context.Context, db *logdb.Database, f client.Fetcher, targetCp cloner.UnwrappedCheckpoint) error { 98 cl := cloner.New(*workers, 1, *writeBatchSize, db) 99 100 next, err := cl.Next() 101 if err != nil { 102 return fmt.Errorf("couldn't determine first leaf to fetch: %v", err) 103 } 104 // TODO(mhutchinson): other implementations don't have this check. Is this redundant, 105 // OR can it be moved deeper into the call stack? 106 if next >= uint64(targetCp.Size) { 107 glog.Infof("No work to do. Local tree size = %d, latest log tree size = %d", next, targetCp.Size) 108 return nil 109 } 110 111 batchFetch := func(start uint64, leaves [][]byte) (uint64, error) { 112 if len(leaves) != 1 { 113 return 0, fmt.Errorf("true batch fetching not supported") 114 } 115 leaf, err := client.GetLeaf(ctx, f, start) 116 leaves[0] = leaf 117 return 1, err 118 } 119 120 if err := cl.CloneAndVerify(ctx, batchFetch, targetCp); err != nil { 121 return fmt.Errorf("failed to clone and verify log: %v", err) 122 } 123 return nil 124 } 125 126 // newFetcher creates a Fetcher for the log at the given root location. 127 func newFetcher(root *url.URL) client.Fetcher { 128 get := getByScheme[root.Scheme] 129 if get == nil { 130 panic(fmt.Errorf("unsupported URL scheme %s", root.Scheme)) 131 } 132 133 return func(ctx context.Context, p string) ([]byte, error) { 134 u, err := root.Parse(p) 135 if err != nil { 136 return nil, err 137 } 138 return get(ctx, u) 139 } 140 } 141 142 var getByScheme = map[string]func(context.Context, *url.URL) ([]byte, error){ 143 "http": readHTTP, 144 "https": readHTTP, 145 } 146 147 func readHTTP(ctx context.Context, u *url.URL) ([]byte, error) { 148 req, err := http.NewRequest("GET", u.String(), nil) 149 if err != nil { 150 return nil, err 151 } 152 c := http.Client{ 153 Timeout: *timeout, 154 } 155 resp, err := c.Do(req.WithContext(ctx)) 156 if err != nil { 157 return nil, err 158 } 159 switch resp.StatusCode { 160 case 404: 161 glog.Infof("Not found: %q", u.String()) 162 return nil, os.ErrNotExist 163 case 200: 164 break 165 default: 166 return nil, fmt.Errorf("unexpected http status %q", resp.Status) 167 } 168 defer func() { 169 if err := resp.Body.Close(); err != nil { 170 glog.Errorf("resp.Body.Close(): %v", err) 171 } 172 }() 173 return io.ReadAll(resp.Body) 174 }