github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/serverless/cmd/clone2serverless/internal/storage/fs/fs.go (about)

     1  // Copyright 2021 Google LLC. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package fs provides a simple filesystem log storage implementation.
    16  package fs
    17  
    18  import (
    19  	"context"
    20  	"crypto/sha256"
    21  	"errors"
    22  	"fmt"
    23  	"os"
    24  	"path/filepath"
    25  	"strconv"
    26  
    27  	"github.com/golang/glog"
    28  	"github.com/transparency-dev/serverless-log/api"
    29  	"github.com/transparency-dev/serverless-log/api/layout"
    30  	"github.com/transparency-dev/serverless-log/pkg/log"
    31  )
    32  
    33  const (
    34  	dirPerm  = 0755
    35  	filePerm = 0644
    36  	// TODO(al): consider making immutable files completely readonly
    37  )
    38  
    39  // Storage is a serverless storage implementation which uses files to store tree state.
    40  // The on-disk structure is:
    41  //
    42  //	<rootDir>/leaves/aa/bb/cc/ddeeff...
    43  //	<rootDir>/leaves/pending/aabbccddeeff...
    44  //	<rootDir>/seq/aa/bb/cc/ddeeff...
    45  //	<rootDir>/tile/<level>/aa/bb/ccddee...
    46  //	<rootDir>/checkpoint
    47  //
    48  // The functions on this struct are not thread-safe.
    49  type Storage struct {
    50  	// rootDir is the root directory where tree data will be stored.
    51  	rootDir string
    52  	// nextSeq is a hint to the Sequence func as to what the next available
    53  	// sequence number is to help performance.
    54  	// Note that nextSeq may be <= than the actual next available number, but
    55  	// never greater.
    56  	nextSeq uint64
    57  }
    58  
    59  const leavesPendingPathFmt = "leaves/pending/%0x"
    60  
    61  // Load returns a Storage instance initialised from the filesystem at the provided location.
    62  // cpSize should be the Size of the checkpoint produced from the last `log.Integrate` call.
    63  func Load(rootDir string, cpSize uint64) (*Storage, error) {
    64  	fi, err := os.Stat(rootDir)
    65  	if err != nil {
    66  		return nil, fmt.Errorf("failed to stat %q: %w", rootDir, err)
    67  	}
    68  
    69  	if !fi.IsDir() {
    70  		return nil, fmt.Errorf("%q is not a directory", rootDir)
    71  	}
    72  
    73  	return &Storage{
    74  		rootDir: rootDir,
    75  		nextSeq: cpSize,
    76  	}, nil
    77  }
    78  
    79  // Create creates a new filesystem hierarchy and returns a Storage representation for it.
    80  func Create(rootDir string) (*Storage, error) {
    81  	_, err := os.Stat(rootDir)
    82  	if err == nil {
    83  		return nil, fmt.Errorf("%q %w", rootDir, os.ErrExist)
    84  	}
    85  
    86  	if err := os.MkdirAll(rootDir, dirPerm); err != nil {
    87  		return nil, fmt.Errorf("failed to create directory %q: %w", rootDir, err)
    88  	}
    89  
    90  	for _, sfx := range []string{"leaves/pending", "seq", "tile"} {
    91  		path := filepath.Join(rootDir, sfx)
    92  		if err := os.MkdirAll(path, dirPerm); err != nil {
    93  			return nil, fmt.Errorf("failed to create directory %q: %w", path, err)
    94  		}
    95  	}
    96  
    97  	fs := &Storage{
    98  		rootDir: rootDir,
    99  		nextSeq: 0,
   100  	}
   101  
   102  	return fs, nil
   103  }
   104  
   105  // Sequence assigns the given leaf entry to the next available sequence number.
   106  // This method will attempt to silently squash duplicate leaves, but it cannot
   107  // be guaranteed that no duplicate entries will exist.
   108  // Returns the sequence number assigned to this leaf (if the leaf has already
   109  // been sequenced it will return the original sequence number and ErrDupeLeaf).
   110  func (fs *Storage) Sequence(ctx context.Context, leafhash []byte, leaf []byte) (uint64, error) {
   111  	// 1. Check for dupe leafhash
   112  	// 2. Write temp file
   113  	// 3. Hard link temp -> seq file
   114  	// 4. Create leafhash file containing assigned sequence number
   115  
   116  	// Ensure the leafhash directory structure is present
   117  	leafDir, leafFile := layout.LeafPath(fs.rootDir, leafhash)
   118  	if err := os.MkdirAll(leafDir, dirPerm); err != nil {
   119  		return 0, fmt.Errorf("failed to make leaf directory structure: %w", err)
   120  	}
   121  	// Check for dupe leaf already present.
   122  	// If there is one, it should contain the existing leaf's sequence number,
   123  	// so read that back and return it.
   124  	leafFQ := filepath.Join(leafDir, leafFile)
   125  	if seqString, err := os.ReadFile(leafFQ); !os.IsNotExist(err) {
   126  		origSeq, err := strconv.ParseUint(string(seqString), 16, 64)
   127  		if err != nil {
   128  			return 0, err
   129  		}
   130  		return origSeq, log.ErrDupeLeaf
   131  	}
   132  
   133  	// Now try to sequence it, we may have to scan over some newly sequenced entries
   134  	// if Sequence has been called since the last time an Integrate/WriteCheckpoint
   135  	// was called.
   136  	for {
   137  		seq := fs.nextSeq
   138  		if err := fs.Assign(ctx, seq, leaf); err == log.ErrSeqAlreadyAssigned {
   139  			// That sequence number is in use, try the next one
   140  			fs.nextSeq++
   141  			continue
   142  		} else if err != nil {
   143  			return 0, fmt.Errorf("failed to link seq file: %w", err)
   144  		}
   145  
   146  		// Create a leafhash file containing the assigned sequence number.
   147  		// This isn't infallible though, if we crash after hardlinking the
   148  		// sequence file above, but before doing this a resubmission of the
   149  		// same leafhash would be permitted.
   150  		//
   151  		// First create a temp file
   152  		leafTmp := fmt.Sprintf("%s.tmp", leafFQ)
   153  		if err := createExclusive(leafTmp, []byte(strconv.FormatUint(seq, 16))); err != nil {
   154  			return 0, fmt.Errorf("couldn't create temporary leafhash file: %w", err)
   155  		}
   156  		defer func() {
   157  			if err := os.Remove(leafTmp); err != nil {
   158  				glog.Errorf("os.Remove(): %v", err)
   159  			}
   160  		}()
   161  		// Link the temporary file in place, if it already exists we likely crashed after
   162  		//creating the tmp file above.
   163  		if err := os.Link(leafTmp, leafFQ); err != nil && !errors.Is(err, os.ErrExist) {
   164  			return 0, fmt.Errorf("couldn't link temporary leafhash file in place: %w", err)
   165  		}
   166  
   167  		// All done!
   168  		return seq, nil
   169  	}
   170  }
   171  
   172  // Assign directly associates the given leaf data with the provided sequence number.
   173  // It is an error to attempt to assign data to a previously assigned sequence number,
   174  // even if the data is identical.
   175  func (fs *Storage) Assign(_ context.Context, seq uint64, leaf []byte) error {
   176  	// Ensure the sequencing directory structure is present:
   177  	seqDir, seqFile := layout.SeqPath(fs.rootDir, seq)
   178  	if err := os.MkdirAll(seqDir, dirPerm); err != nil {
   179  		return fmt.Errorf("failed to make seq directory structure: %w", err)
   180  	}
   181  
   182  	// Write a temp file with the leaf data
   183  	tmp := filepath.Join(fs.rootDir, fmt.Sprintf(leavesPendingPathFmt, sha256.Sum256(leaf)))
   184  	if err := createExclusive(tmp, leaf); err != nil {
   185  		return fmt.Errorf("unable to write temporary file: %w", err)
   186  	}
   187  	defer func() {
   188  		if err := os.Remove(tmp); err != nil {
   189  			glog.Errorf("os.Remove(): %v", err)
   190  		}
   191  	}()
   192  
   193  	// Hardlink the sequence file to the temporary file
   194  	seqPath := filepath.Join(seqDir, seqFile)
   195  	if err := os.Link(tmp, seqPath); errors.Is(err, os.ErrExist) {
   196  		return log.ErrSeqAlreadyAssigned
   197  	} else if err != nil {
   198  		return fmt.Errorf("failed to link seq file: %w", err)
   199  	}
   200  	return nil
   201  }
   202  
   203  // createExclusive creates the named file before writing the data in d to it.
   204  // It will error if the file already exists, or it's unable to fully write the
   205  // data & close the file.
   206  func createExclusive(f string, d []byte) error {
   207  	tmpFile, err := os.OpenFile(f, os.O_RDWR|os.O_CREATE|os.O_EXCL, filePerm)
   208  	if err != nil {
   209  		return fmt.Errorf("unable to create temporary file: %w", err)
   210  	}
   211  	n, err := tmpFile.Write(d)
   212  	if err != nil {
   213  		return fmt.Errorf("unable to write leafdata to temporary file: %w", err)
   214  	}
   215  	if got, want := n, len(d); got != want {
   216  		return fmt.Errorf("short write on leaf, wrote %d expected %d", got, want)
   217  	}
   218  	if err := tmpFile.Close(); err != nil {
   219  		return err
   220  	}
   221  	return nil
   222  }
   223  
   224  // ScanSequenced calls the provided function once for each contiguous entry
   225  // in storage starting at begin.
   226  // The scan will abort if the function returns an error, otherwise it will
   227  // return the number of sequenced entries.
   228  func (fs *Storage) ScanSequenced(_ context.Context, begin uint64, f func(seq uint64, entry []byte) error) (uint64, error) {
   229  	end := begin
   230  	for {
   231  		sp := filepath.Join(layout.SeqPath(fs.rootDir, end))
   232  		entry, err := os.ReadFile(sp)
   233  		if errors.Is(err, os.ErrNotExist) {
   234  			// we're done.
   235  			return end - begin, nil
   236  		} else if err != nil {
   237  			return end - begin, fmt.Errorf("failed to read leafdata at index %d: %w", begin, err)
   238  		}
   239  		if err := f(end, entry); err != nil {
   240  			return end - begin, err
   241  		}
   242  		end++
   243  	}
   244  }
   245  
   246  // GetTile returns the tile at the given tile-level and tile-index.
   247  // If no complete tile exists at that location, it will attempt to find a
   248  // partial tile for the given tree size at that location.
   249  func (fs *Storage) GetTile(_ context.Context, level, index, logSize uint64) (*api.Tile, error) {
   250  	tileSize := layout.PartialTileSize(level, index, logSize)
   251  	p := filepath.Join(layout.TilePath(fs.rootDir, level, index, tileSize))
   252  	t, err := os.ReadFile(p)
   253  	if err != nil {
   254  		if !errors.Is(err, os.ErrNotExist) {
   255  			return nil, fmt.Errorf("failed to read tile at %q: %w", p, err)
   256  		}
   257  		return nil, err
   258  	}
   259  
   260  	var tile api.Tile
   261  	if err := tile.UnmarshalText(t); err != nil {
   262  		return nil, fmt.Errorf("failed to parse tile: %w", err)
   263  	}
   264  	return &tile, nil
   265  }
   266  
   267  // StoreTile writes a tile out to disk.
   268  // Fully populated tiles are stored at the path corresponding to the level &
   269  // index parameters, partially populated (i.e. right-hand edge) tiles are
   270  // stored with a .xx suffix where xx is the number of "tile leaves" in hex.
   271  func (fs *Storage) StoreTile(_ context.Context, level, index uint64, tile *api.Tile) error {
   272  	tileSize := uint64(tile.NumLeaves)
   273  	glog.V(2).Infof("StoreTile: level %d index %x ts: %x", level, index, tileSize)
   274  	if tileSize == 0 || tileSize > 256 {
   275  		return fmt.Errorf("tileSize %d must be > 0 and <= 256", tileSize)
   276  	}
   277  	t, err := tile.MarshalText()
   278  	if err != nil {
   279  		return fmt.Errorf("failed to marshal tile: %w", err)
   280  	}
   281  
   282  	tDir, tFile := layout.TilePath(fs.rootDir, level, index, tileSize%256)
   283  	tPath := filepath.Join(tDir, tFile)
   284  
   285  	if err := os.MkdirAll(tDir, dirPerm); err != nil {
   286  		return fmt.Errorf("failed to create directory %q: %w", tDir, err)
   287  	}
   288  
   289  	// TODO(al): use unlinked temp file
   290  	temp := fmt.Sprintf("%s.temp", tPath)
   291  	if err := os.WriteFile(temp, t, filePerm); err != nil {
   292  		return fmt.Errorf("failed to write temporary tile file: %w", err)
   293  	}
   294  	if err := os.Rename(temp, tPath); err != nil {
   295  		return fmt.Errorf("failed to rename temporary tile file: %w", err)
   296  	}
   297  
   298  	if tileSize == 256 {
   299  		partials, err := filepath.Glob(fmt.Sprintf("%s.*", tPath))
   300  		if err != nil {
   301  			return fmt.Errorf("failed to list partial tiles for clean up; %w", err)
   302  		}
   303  		// Clean up old partial tiles by symlinking them to the new full tile.
   304  		for _, p := range partials {
   305  			glog.V(2).Infof("relink partial %s to %s", p, tPath)
   306  			// We have to do a little dance here to get POSIX atomicity:
   307  			// 1. Create a new temporary symlink to the full tile
   308  			// 2. Rename the temporary symlink over the top of the old partial tile
   309  			tmp := fmt.Sprintf("%s.link", tPath)
   310  			if err := os.Symlink(tPath, tmp); err != nil {
   311  				return fmt.Errorf("failed to create temp link to full tile: %w", err)
   312  			}
   313  			if err := os.Rename(tmp, p); err != nil {
   314  				return fmt.Errorf("failed to rename temp link over partial tile: %w", err)
   315  			}
   316  		}
   317  	}
   318  
   319  	return nil
   320  }
   321  
   322  // WriteCheckpoint stores a raw log checkpoint on disk.
   323  func (fs Storage) WriteCheckpoint(_ context.Context, newCPRaw []byte) error {
   324  	oPath := filepath.Join(fs.rootDir, layout.CheckpointPath)
   325  	tmp := fmt.Sprintf("%s.tmp", oPath)
   326  	if err := createExclusive(tmp, newCPRaw); err != nil {
   327  		return fmt.Errorf("failed to create temporary checkpoint file: %w", err)
   328  	}
   329  	return os.Rename(tmp, oPath)
   330  }
   331  
   332  // ReadCheckpoint reads and returns the contents of the log checkpoint file.
   333  func ReadCheckpoint(rootDir string) ([]byte, error) {
   334  	s := filepath.Join(rootDir, layout.CheckpointPath)
   335  	return os.ReadFile(s)
   336  }