github.com/bartle-stripe/trillian@v1.2.1/storage/cloudspanner/tree_storage.go (about) 1 // Copyright 2018 Google Inc. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package cloudspanner 16 17 import ( 18 "bytes" 19 "context" 20 "errors" 21 "fmt" 22 "sync" 23 "time" 24 25 "cloud.google.com/go/spanner" 26 "github.com/golang/glog" 27 "github.com/golang/protobuf/proto" 28 "github.com/google/trillian" 29 "github.com/google/trillian/storage" 30 "github.com/google/trillian/storage/cache" 31 "github.com/google/trillian/storage/cloudspanner/spannerpb" 32 "github.com/google/trillian/storage/storagepb" 33 "google.golang.org/grpc/codes" 34 "google.golang.org/grpc/status" 35 ) 36 37 var ( 38 // ErrNotFound is returned when a read/lookup fails because there was no such 39 // item. 40 ErrNotFound = status.Errorf(codes.NotFound, "not found") 41 42 // ErrNotImplemented is returned by any interface methods which have not been 43 // implemented yet. 44 ErrNotImplemented = errors.New("not implemented") 45 46 // ErrTransactionClosed is returned by interface methods when an operation is 47 // attempted on a transaction whose Commit or Rollback methods have 48 // previously been called. 49 ErrTransactionClosed = errors.New("transaction is closed") 50 51 // ErrWrongTXType is returned when, somehow, a write operation is attempted 52 // with a read-only transaction. This should not even be possible. 53 ErrWrongTXType = errors.New("mutating method called on read-only transaction") 54 ) 55 56 const ( 57 subtreeTbl = "SubtreeData" 58 colBucket = "Bucket" 59 colSubtree = "Subtree" 60 colSubtreeID = "SubtreeID" 61 colTreeID = "TreeID" 62 colRevision = "Revision" 63 ) 64 65 // treeStorage provides a shared base for the concrete CloudSpanner-backed 66 // implementation of the Trillian storage.LogStorage and storage.MapStorage 67 // interfaces. 68 type treeStorage struct { 69 admin storage.AdminStorage 70 opts TreeStorageOptions 71 client *spanner.Client 72 } 73 74 // TreeStorageOptions holds various levers for configuring the tree storage instance. 75 type TreeStorageOptions struct { 76 // ReadOnlyStaleness controls how far in the past a read-only snapshot 77 // transaction will read. 78 // This is intended to allow Spanner to use local replicas for read requests 79 // to help with performance. 80 // See https://cloud.google.com/spanner/docs/timestamp-bounds for more details. 81 ReadOnlyStaleness time.Duration 82 } 83 84 func newTreeStorageWithOpts(client *spanner.Client, opts TreeStorageOptions) *treeStorage { 85 return &treeStorage{client: client, admin: nil, opts: opts} 86 } 87 88 type spanRead interface { 89 Query(context.Context, spanner.Statement) *spanner.RowIterator 90 Read(ctx context.Context, table string, keys spanner.KeySet, columns []string) *spanner.RowIterator 91 ReadUsingIndex(ctx context.Context, table, index string, keys spanner.KeySet, columns []string) *spanner.RowIterator 92 ReadRow(ctx context.Context, table string, key spanner.Key, columns []string) (*spanner.Row, error) 93 ReadWithOptions(ctx context.Context, table string, keys spanner.KeySet, columns []string, opts *spanner.ReadOptions) (ri *spanner.RowIterator) 94 } 95 96 // latestSTH reads and returns the newest STH. 97 func (t *treeStorage) latestSTH(ctx context.Context, stx spanRead, treeID int64) (*spannerpb.TreeHead, error) { 98 query := spanner.NewStatement( 99 "SELECT t.TreeID, t.TimestampNanos, t.TreeSize, t.RootHash, t.RootSignature, t.TreeRevision, t.TreeMetadata FROM TreeHeads t" + 100 " WHERE t.TreeID = @tree_id" + 101 " ORDER BY t.TreeRevision DESC " + 102 " LIMIT 1") 103 query.Params["tree_id"] = treeID 104 105 var th *spannerpb.TreeHead 106 rows := stx.Query(ctx, query) 107 defer rows.Stop() 108 err := rows.Do(func(r *spanner.Row) error { 109 tth := &spannerpb.TreeHead{} 110 if err := r.Columns(&tth.TreeId, &tth.TsNanos, &tth.TreeSize, &tth.RootHash, &tth.Signature, &tth.TreeRevision, &tth.Metadata); err != nil { 111 return err 112 } 113 114 th = tth 115 return nil 116 }) 117 if err != nil { 118 return nil, err 119 } 120 if th == nil { 121 glog.Warningf("no head found for treeID %v", treeID) 122 return nil, storage.ErrTreeNeedsInit 123 } 124 return th, nil 125 } 126 127 type newCacheFn func(*trillian.Tree) (cache.SubtreeCache, error) 128 129 func (t *treeStorage) getTreeAndConfig(ctx context.Context, tree *trillian.Tree) (*trillian.Tree, proto.Message, error) { 130 config, err := unmarshalSettings(tree) 131 if err != nil { 132 return nil, nil, err 133 } 134 return tree, config, nil 135 } 136 137 // begin returns a newly started tree transaction for the specified tree. 138 func (t *treeStorage) begin(ctx context.Context, tree *trillian.Tree, newCache newCacheFn, stx spanRead) (*treeTX, error) { 139 tree, config, err := t.getTreeAndConfig(ctx, tree) 140 if err != nil { 141 return nil, err 142 } 143 cache, err := newCache(tree) 144 if err != nil { 145 return nil, err 146 } 147 treeTX := &treeTX{ 148 treeID: tree.TreeId, 149 ts: t, 150 stx: stx, 151 cache: cache, 152 config: config, 153 } 154 155 return treeTX, nil 156 } 157 158 // getLatestRoot populates this TX with the newest tree root visible (when 159 // taking read-staleness into account) by this transaction. 160 func (t *treeTX) getLatestRoot(ctx context.Context) error { 161 t.getLatestRootOnce.Do(func() { 162 t._currentSTH, t._currentSTHErr = t.ts.latestSTH(ctx, t.stx, t.treeID) 163 if t._currentSTH != nil { 164 t._writeRev = t._currentSTH.TreeRevision + 1 165 } 166 }) 167 168 return t._currentSTHErr 169 } 170 171 // treeTX is a concrete implementation of the Trillian 172 // storage.TreeTX interface. 173 type treeTX struct { 174 treeID int64 175 176 ts *treeStorage 177 178 // mu guards the nil setting/checking of stx as part of the open checking. 179 mu sync.RWMutex 180 // stx is the underlying Spanner transaction in which all operations will be 181 // performed. 182 stx spanRead 183 184 // config holds the StorageSettings proto acquired from the trillian.Tree. 185 // Varies according to tree_type (LogStorageConfig vs MapStorageConfig). 186 config proto.Message 187 188 // currentSTH holds a copy of the latest known STH at the time the 189 // transaction was started, or nil if there was no STH. 190 _currentSTH *spannerpb.TreeHead 191 _currentSTHErr error 192 193 // writeRev is the tree revision at which any writes will be made. 194 _writeRev int64 195 196 cache cache.SubtreeCache 197 198 getLatestRootOnce sync.Once 199 } 200 201 func (t *treeTX) currentSTH(ctx context.Context) (*spannerpb.TreeHead, error) { 202 if err := t.getLatestRoot(ctx); err != nil { 203 return nil, err 204 } 205 return t._currentSTH, nil 206 } 207 208 func (t *treeTX) writeRev(ctx context.Context) (int64, error) { 209 if err := t.getLatestRoot(ctx); err != nil { 210 return -1, err 211 } 212 return t._writeRev, nil 213 } 214 215 // storeSubtrees adds buffered writes to the in-flight transaction to store the 216 // passed in subtrees. 217 func (t *treeTX) storeSubtrees(sts []*storagepb.SubtreeProto) error { 218 stx, ok := t.stx.(*spanner.ReadWriteTransaction) 219 if !ok { 220 return ErrWrongTXType 221 } 222 for _, st := range sts { 223 if st == nil { 224 continue 225 } 226 stBytes, err := proto.Marshal(st) 227 if err != nil { 228 return err 229 } 230 m := spanner.Insert( 231 subtreeTbl, 232 []string{colTreeID, colSubtreeID, colRevision, colSubtree}, 233 []interface{}{t.treeID, st.Prefix, t._writeRev, stBytes}, 234 ) 235 if err := stx.BufferWrite([]*spanner.Mutation{m}); err != nil { 236 return err 237 } 238 } 239 return nil 240 } 241 242 func (t *treeTX) flushSubtrees() error { 243 return t.cache.Flush(t.storeSubtrees) 244 } 245 246 // Commit attempts to apply all actions perfomed to the underlying Spanner 247 // transaction. If this call returns an error, any values READ via this 248 // transaction MUST NOT be used. 249 // On return from the call, this transaction will be in a closed state. 250 func (t *treeTX) Commit() error { 251 t.mu.Lock() 252 defer func() { 253 t.stx = nil 254 t.mu.Unlock() 255 }() 256 257 if t.stx == nil { 258 return ErrTransactionClosed 259 } 260 switch stx := t.stx.(type) { 261 case *spanner.ReadOnlyTransaction: 262 glog.V(1).Infof("Closed readonly tx %p", stx) 263 stx.Close() 264 return nil 265 case *spanner.ReadWriteTransaction: 266 return t.flushSubtrees() 267 default: 268 return fmt.Errorf("internal error: unknown transaction type %T", stx) 269 } 270 } 271 272 // Rollback aborts any operations perfomed on the underlying Spanner 273 // transaction. 274 // On return from the call, this transaction will be in a closed state. 275 func (t *treeTX) Rollback() error { 276 t.mu.Lock() 277 defer func() { 278 t.stx = nil 279 t.mu.Unlock() 280 }() 281 282 if t.stx == nil { 283 return ErrTransactionClosed 284 } 285 return nil 286 } 287 288 func (t *treeTX) Close() error { 289 if t.IsOpen() { 290 if err := t.Rollback(); err != nil && err != ErrTransactionClosed { 291 glog.Warningf("Rollback error on Close(): %v", err) 292 return err 293 } 294 } 295 return nil 296 } 297 298 // IsOpen returns true iff neither Commit nor Rollback have been called. 299 // If this function returns false, further operations may not be attempted on 300 // this transaction object. 301 func (t *treeTX) IsOpen() bool { 302 t.mu.RLock() 303 defer t.mu.RUnlock() 304 return t.stx != nil 305 } 306 307 // ReadRevision returns the tree revision at which the currently visible (taking 308 // into account read-staleness) STH was stored. 309 func (t *treeTX) ReadRevision() int64 { 310 sth, err := t.currentSTH(context.TODO()) 311 if err != nil { 312 panic(err) 313 } 314 return sth.TreeRevision 315 } 316 317 // WriteRevision returns the tree revision at which any tree-modifying 318 // operations will write. 319 func (t *treeTX) WriteRevision() int64 { 320 rev, err := t.writeRev(context.TODO()) 321 if err != nil { 322 panic(err) 323 } 324 return rev 325 } 326 327 // nodeIDToKey returns a []byte suitable for use as a primary key column for 328 // the subtree which contains the id. 329 // If id's prefix is not byte-aligned, an error will be returned. 330 func subtreeKey(id storage.NodeID) ([]byte, error) { 331 // TODO(al): extend this check to ensure id is at a tree stratum boundary. 332 if id.PrefixLenBits%8 != 0 { 333 return nil, fmt.Errorf("id.PrefixLenBits (%d) is not a multiple of 8; it cannot be a subtree prefix", id.PrefixLenBits) 334 } 335 return id.Path[:id.PrefixLenBits/8], nil 336 } 337 338 // getSubtree retrieves the most recent subtree specified by id at (or below) 339 // the requested revision. 340 // If no such subtree exists it returns nil. 341 func (t *treeTX) getSubtree(ctx context.Context, rev int64, id storage.NodeID) (p *storagepb.SubtreeProto, e error) { 342 stID, err := subtreeKey(id) 343 if err != nil { 344 return nil, err 345 } 346 347 var ret *storagepb.SubtreeProto 348 stmt := spanner.NewStatement( 349 "SELECT Revision, Subtree FROM SubtreeData d" + 350 " WHERE d.TreeID = @tree_id" + 351 " AND d.SubtreeID = @subtree_id" + 352 " AND d.Revision <= @revision" + 353 " ORDER BY d.Revision DESC" + 354 " LIMIT 1") 355 stmt.Params["tree_id"] = t.treeID 356 stmt.Params["subtree_id"] = stID 357 stmt.Params["revision"] = rev 358 359 rows := t.stx.Query(ctx, stmt) 360 err = rows.Do(func(r *spanner.Row) error { 361 if ret != nil { 362 return nil 363 } 364 365 var rRev int64 366 var st storagepb.SubtreeProto 367 stBytes := make([]byte, 1<<20) 368 if err = r.Columns(&rRev, &stBytes); err != nil { 369 return err 370 } 371 if err = proto.Unmarshal(stBytes, &st); err != nil { 372 return err 373 } 374 375 if rRev > rev { 376 return fmt.Errorf("got subtree with too new a revision %d, want %d", rRev, rev) 377 } 378 if got, want := stID, st.Prefix; !bytes.Equal(got, want) { 379 return fmt.Errorf("got subtree with prefix %v, wanted %v", got, want) 380 } 381 if got, want := rRev, rev; got > rev { 382 return fmt.Errorf("got subtree rev %d, wanted <= %d", got, want) 383 } 384 ret = &st 385 386 // If this is a subtree with a zero-length prefix, we'll need to create an 387 // empty Prefix field: 388 if st.Prefix == nil && len(stID) == 0 { 389 st.Prefix = []byte{} 390 } 391 return nil 392 }) 393 return ret, err 394 } 395 396 // GetMerkleNodes returns the requested set of nodes at, or before, the 397 // specified tree revision. 398 func (t *treeTX) GetMerkleNodes(ctx context.Context, rev int64, ids []storage.NodeID) ([]storage.Node, error) { 399 t.mu.RLock() 400 defer t.mu.RUnlock() 401 if t.stx == nil { 402 return nil, ErrTransactionClosed 403 } 404 405 return t.cache.GetNodes(ids, 406 func(ids []storage.NodeID) ([]*storagepb.SubtreeProto, error) { 407 // Request the various subtrees in parallel. 408 // c will carry any retrieved subtrees 409 c := make(chan *storagepb.SubtreeProto, len(ids)) 410 // err will carry any errors encountered while reading from spanner, 411 // although we'll only return to the caller the first one (if indeed 412 // there are any). 413 errc := make(chan error, len(ids)) 414 415 // Spawn goroutines for each request 416 for _, id := range ids { 417 id := id 418 go func() { 419 st, err := t.getSubtree(ctx, rev, id) 420 if err != nil { 421 errc <- err 422 return 423 } 424 c <- st 425 }() 426 } 427 428 // Now wait for the goroutines to signal their completion, and collect 429 // the results. 430 ret := make([]*storagepb.SubtreeProto, 0, len(ids)) 431 for range ids { 432 select { 433 case err := <-errc: 434 return nil, err 435 case st := <-c: 436 if st != nil { 437 ret = append(ret, st) 438 } 439 } 440 } 441 return ret, nil 442 }) 443 } 444 445 // SetMerkleNodes stores the provided merkle nodes at the writeRevision of the 446 // transaction. 447 func (t *treeTX) SetMerkleNodes(ctx context.Context, nodes []storage.Node) error { 448 t.mu.RLock() 449 defer t.mu.RUnlock() 450 if t.stx == nil { 451 return ErrTransactionClosed 452 } 453 454 writeRev, err := t.writeRev(ctx) 455 if err != nil { 456 return err 457 } 458 459 for _, n := range nodes { 460 err := t.cache.SetNodeHash( 461 n.NodeID, 462 n.Hash, 463 func(nID storage.NodeID) (*storagepb.SubtreeProto, error) { 464 return t.getSubtree(ctx, writeRev-1, nID) 465 }) 466 if err != nil { 467 return err 468 } 469 } 470 return nil 471 } 472 473 func checkDatabaseAccessible(ctx context.Context, client *spanner.Client) error { 474 stmt := spanner.NewStatement("SELECT 1") 475 // We don't care about freshness here, being able to read *something* is enough 476 rows := client.Single().Query(ctx, stmt) 477 defer rows.Stop() 478 return rows.Do(func(row *spanner.Row) error { return nil }) 479 } 480 481 // snapshotTX provides the standard methods for snapshot-based TXs. 482 type snapshotTX struct { 483 client *spanner.Client 484 485 // mu guards stx, which is set to nil when the TX is closed. 486 mu sync.RWMutex 487 stx spanRead 488 ls *logStorage 489 } 490 491 func (t *snapshotTX) Commit() error { 492 // No work required to commit snapshot transactions 493 return t.Close() 494 } 495 496 func (t *snapshotTX) Rollback() error { 497 return t.Close() 498 } 499 500 func (t *snapshotTX) Close() error { 501 t.mu.Lock() 502 defer t.mu.Unlock() 503 if t.stx == nil { 504 return ErrTransactionClosed 505 } 506 if stx, ok := t.stx.(*spanner.ReadOnlyTransaction); ok { 507 glog.V(1).Infof("Closed log snapshot %p", stx) 508 stx.Close() 509 } 510 t.stx = nil 511 512 return nil 513 }