github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/doltdb/commit_itr.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package doltdb
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"io"
    21  
    22  	"github.com/dolthub/dolt/go/store/hash"
    23  	"github.com/dolthub/dolt/go/store/types"
    24  )
    25  
    26  // CommitItr is an interface for iterating over a set of unique commits
    27  type CommitItr interface {
    28  	// Next returns the hash of the next commit, and a pointer to that commit.  Implementations of Next must handle
    29  	// making sure the list of commits returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
    30  	Next(ctx context.Context) (hash.Hash, *Commit, error)
    31  
    32  	// Reset the commit iterator back to the start
    33  	Reset(ctx context.Context) error
    34  }
    35  
    36  type commitItr struct {
    37  	ddb         *DoltDB
    38  	rootCommits []*Commit
    39  	currentRoot int
    40  
    41  	added       map[hash.Hash]bool
    42  	unprocessed []hash.Hash
    43  	curr        *Commit
    44  }
    45  
    46  // CommitItrForAllBranches returns a CommitItr which will iterate over all commits in all branches in a DoltDB
    47  func CommitItrForAllBranches(ctx context.Context, ddb *DoltDB) (CommitItr, error) {
    48  	branchRefs, err := ddb.GetBranches(ctx)
    49  
    50  	if err != nil {
    51  		return nil, err
    52  	}
    53  
    54  	rootCommits := make([]*Commit, 0, len(branchRefs))
    55  	for _, ref := range branchRefs {
    56  		cm, err := ddb.ResolveCommitRef(ctx, ref)
    57  
    58  		if err != nil {
    59  			return nil, err
    60  		}
    61  
    62  		rootCommits = append(rootCommits, cm)
    63  	}
    64  
    65  	cmItr := CommitItrForRoots(ddb, rootCommits...)
    66  	return cmItr, nil
    67  }
    68  
    69  // CommitItrForRoots will return a CommitItr which will iterate over all descendant commits of the provided rootCommits.
    70  func CommitItrForRoots(ddb *DoltDB, rootCommits ...*Commit) CommitItr {
    71  	return &commitItr{
    72  		ddb:         ddb,
    73  		rootCommits: rootCommits,
    74  		added:       make(map[hash.Hash]bool, 4096),
    75  		unprocessed: make([]hash.Hash, 0, 4096),
    76  	}
    77  }
    78  
    79  func (cmItr *commitItr) Reset(ctx context.Context) error {
    80  	cmItr.curr = nil
    81  	cmItr.currentRoot = 0
    82  	cmItr.added = make(map[hash.Hash]bool, 4096)
    83  	cmItr.unprocessed = cmItr.unprocessed[:0]
    84  
    85  	return nil
    86  }
    87  
    88  // Next returns the hash of the next commit, and a pointer to that commit.  It handles making sure the list of commits
    89  // returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
    90  func (cmItr *commitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) {
    91  	for cmItr.curr == nil {
    92  		if cmItr.currentRoot >= len(cmItr.rootCommits) {
    93  			return hash.Hash{}, nil, io.EOF
    94  		}
    95  
    96  		cm := cmItr.rootCommits[cmItr.currentRoot]
    97  		h, err := cm.HashOf()
    98  
    99  		if err != nil {
   100  			return hash.Hash{}, nil, err
   101  		}
   102  
   103  		if !cmItr.added[h] {
   104  			cmItr.added[h] = true
   105  			cmItr.curr = cm
   106  			return h, cmItr.curr, nil
   107  		}
   108  
   109  		cmItr.currentRoot++
   110  	}
   111  
   112  	parents, err := cmItr.curr.ParentHashes(ctx)
   113  
   114  	if err != nil {
   115  		return hash.Hash{}, nil, err
   116  	}
   117  
   118  	for _, h := range parents {
   119  		if !cmItr.added[h] {
   120  			cmItr.added[h] = true
   121  			cmItr.unprocessed = append(cmItr.unprocessed, h)
   122  		}
   123  	}
   124  
   125  	numUnprocessed := len(cmItr.unprocessed)
   126  
   127  	if numUnprocessed == 0 {
   128  		cmItr.curr = nil
   129  		cmItr.currentRoot++
   130  		return cmItr.Next(ctx)
   131  	}
   132  
   133  	next := cmItr.unprocessed[numUnprocessed-1]
   134  	cmItr.unprocessed = cmItr.unprocessed[:numUnprocessed-1]
   135  	cmItr.curr, err = hashToCommit(ctx, cmItr.ddb.ValueReadWriter(), next)
   136  
   137  	if err != nil {
   138  		return hash.Hash{}, nil, err
   139  	}
   140  
   141  	return next, cmItr.curr, nil
   142  }
   143  
   144  func hashToCommit(ctx context.Context, vrw types.ValueReadWriter, h hash.Hash) (*Commit, error) {
   145  	val, err := vrw.ReadValue(ctx, h)
   146  
   147  	if err != nil {
   148  		return nil, err
   149  	}
   150  
   151  	if val == nil {
   152  		return nil, errors.New("failed to get commit")
   153  	}
   154  
   155  	cmSt := val.(types.Struct)
   156  	return NewCommit(vrw, cmSt), nil
   157  }
   158  
   159  // CommitFilter is a function that returns true if a commit should be filtered out, and false if it should be kept
   160  type CommitFilter func(context.Context, hash.Hash, *Commit) (filterOut bool, err error)
   161  
   162  // FilteringCommitItr is a CommitItr implementation that applies a filtering function to limit the commits returned
   163  type FilteringCommitItr struct {
   164  	itr    CommitItr
   165  	filter CommitFilter
   166  }
   167  
   168  func NewFilteringCommitItr(itr CommitItr, filter CommitFilter) FilteringCommitItr {
   169  	return FilteringCommitItr{itr, filter}
   170  }
   171  
   172  // Next returns the hash of the next commit, and a pointer to that commit.  Implementations of Next must handle
   173  // making sure the list of commits returned are unique.  When complete Next will return hash.Hash{}, nil, io.EOF
   174  func (itr FilteringCommitItr) Next(ctx context.Context) (hash.Hash, *Commit, error) {
   175  	// iteration will terminate on io.EOF or a commit that is !filteredOut
   176  	for {
   177  		h, cm, err := itr.itr.Next(ctx)
   178  
   179  		if err != nil {
   180  			return hash.Hash{}, nil, err
   181  		}
   182  
   183  		if filterOut, err := itr.filter(ctx, h, cm); err != nil {
   184  			return hash.Hash{}, nil, err
   185  		} else if !filterOut {
   186  			return h, cm, nil
   187  		}
   188  	}
   189  }
   190  
   191  // Reset the commit iterator back to the
   192  func (itr FilteringCommitItr) Reset(ctx context.Context) error {
   193  	return itr.itr.Reset(ctx)
   194  }