github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/transform/chain/no_skip_chain.go (about)

     1  package chain
     2  
     3  import (
     4  	"context"
     5  
     6  	"github.com/pachyderm/pachyderm/src/server/worker/common"
     7  	"github.com/pachyderm/pachyderm/src/server/worker/datum"
     8  )
     9  
    10  type noSkipJobDatumIterator struct {
    11  	dit       datum.Iterator
    12  	ditIndex  int
    13  	allDatums DatumSet
    14  	done      bool
    15  }
    16  
    17  type noSkipJobChain struct {
    18  	hasher DatumHasher
    19  }
    20  
    21  // NewNoSkipJobChain constructs a JobChain that will always yield all datums in each
    22  // job's datum iterator, and will therefore not block.
    23  func NewNoSkipJobChain(hasher DatumHasher) JobChain {
    24  	return &noSkipJobChain{hasher: hasher}
    25  }
    26  
    27  // Start adds a new job to the chain and returns the corresponding
    28  // JobDatumIterator
    29  func (jc *noSkipJobChain) Start(jd JobData) (JobDatumIterator, error) {
    30  	dit, err := jd.Iterator()
    31  	if err != nil {
    32  		return nil, err
    33  	}
    34  
    35  	allDatums := make(DatumSet)
    36  
    37  	dit.Reset()
    38  	for dit.Next() {
    39  		allDatums[jc.hasher.Hash(dit.Datum())]++
    40  	}
    41  	dit.Reset()
    42  
    43  	return &noSkipJobDatumIterator{
    44  		allDatums: allDatums,
    45  		dit:       dit,
    46  		ditIndex:  -1,
    47  		done:      false,
    48  	}, nil
    49  }
    50  
    51  // RecoveredDatums indicates the set of recovered datums for the job. This can
    52  // be called multiple times.
    53  func (jc *noSkipJobChain) RecoveredDatums(jd JobData, recoveredDatums DatumSet) error {
    54  	return nil
    55  }
    56  
    57  // Succeed indicates that the job has finished successfully
    58  func (jc *noSkipJobChain) Succeed(jd JobData) error {
    59  	return nil
    60  }
    61  
    62  // Fail indicates that the job has finished unsuccessfully
    63  func (jc *noSkipJobChain) Fail(jd JobData) error {
    64  	return nil
    65  }
    66  
    67  func (jdi *noSkipJobDatumIterator) NextBatch(ctx context.Context) (int64, error) {
    68  	if !jdi.done {
    69  		jdi.done = true
    70  		return jdi.MaxLen(), nil
    71  	}
    72  	return 0, nil
    73  }
    74  
    75  func (jdi *noSkipJobDatumIterator) NextDatum() ([]*common.Input, int64) {
    76  	jdi.ditIndex++
    77  	if jdi.ditIndex < jdi.dit.Len() {
    78  		return jdi.dit.DatumN(jdi.ditIndex), int64(jdi.ditIndex)
    79  	}
    80  	return nil, 0
    81  }
    82  
    83  func (jdi *noSkipJobDatumIterator) Reset() {
    84  	jdi.dit.Reset()
    85  	jdi.done = false
    86  }
    87  
    88  func (jdi *noSkipJobDatumIterator) MaxLen() int64 {
    89  	return int64(jdi.dit.Len())
    90  }
    91  
    92  func (jdi *noSkipJobDatumIterator) DatumSet() DatumSet {
    93  	return jdi.allDatums
    94  }
    95  
    96  func (jdi *noSkipJobDatumIterator) AdditiveOnly() bool {
    97  	return false
    98  }