kythe.io@v0.0.68-0.20240422202219-7225dbc01741/kythe/go/util/pager/pager.go (about)

     1  /*
     2   * Copyright 2015 The Kythe Authors. All rights reserved.
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *   http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  // Package pager implements a generic SetPager that splits a stream of Groups
    18  // into a single Set and one-or-more associated Pages.  Useful for constructing
    19  // paged serving data.
    20  package pager // import "kythe.io/kythe/go/util/pager"
    21  
    22  import (
    23  	"container/heap"
    24  	"context"
    25  	"errors"
    26  	"fmt"
    27  
    28  	"kythe.io/kythe/go/util/sortutil"
    29  )
    30  
    31  // A Head signals the start of a new Set.
    32  type Head any
    33  
    34  // A Group is part of a Set.
    35  type Group any
    36  
    37  // A Set is a set of Groups.
    38  type Set any
    39  
    40  // SetPager constructs a set of Sets and Pages from a sequence of Heads and
    41  // Groups.  For each set of Groups with the same Head, a call to StartSet must
    42  // precede.  All Groups for the same Head are then assumed to be given
    43  // sequentially to AddGroup.  Flush must be called after the final call to
    44  // AddGroup.
    45  type SetPager struct {
    46  	// MaxPageSize is the maximum size of a Set or Page, as calculated by the
    47  	// given Size function.
    48  	MaxPageSize int
    49  
    50  	// SkipEmpty determines whether empty Sets/Pages will be emitted.
    51  	SkipEmpty bool
    52  
    53  	// OutputSet should output the given Set and Groups not previously emitted by
    54  	// OutputPage.  The total size of all Groups is given.
    55  	OutputSet func(context.Context, int, Set, []Group) error
    56  	// OutputPage should output the given Group as an individual Page.  The Set
    57  	// currently being built is given for any necessary mutations.
    58  	OutputPage func(context.Context, Set, Group) error
    59  
    60  	// NewSet returns a new Set for the given Head.
    61  	NewSet func(Head) Set
    62  	// Combine possibly merges two Groups with the Head together.  If not
    63  	// possible, nil should be returned.
    64  	//
    65  	//   Constraints (if g != nil):
    66  	//     Combine(l, r) == Split(Size(l), g)
    67  	Combine func(l, r Group) (g Group)
    68  	// Split splits the given Group into a Group of the given size and a Group
    69  	// with any leftovers.
    70  	//
    71  	//   Constraints:
    72  	//     Size(l) == total
    73  	//     Size(r) == Size(g) - total
    74  	//     g == Combine(l, r)
    75  	Split func(total int, g Group) (l, r Group)
    76  	// Size returns the size of the given Group.
    77  	//
    78  	// Constraints:
    79  	//   Size(l) + Size(r) == Size(Combine(l, r))
    80  	Size func(Group) int
    81  
    82  	curSet          Set
    83  	curGrp          Group
    84  	groups          *sortutil.ByLesser // heap sorted by Size
    85  	resident, total int
    86  }
    87  
    88  // StartSet begins a new Set for the given Head, possibly emitting a previous
    89  // Set.  Each following call to AddGroup adds the group to this new Set until
    90  // another call to StartSet is made.
    91  func (p *SetPager) StartSet(ctx context.Context, hd Head) error {
    92  	if p.curSet != nil {
    93  		if err := p.Flush(ctx); err != nil {
    94  			return fmt.Errorf("error flushing previous set: %v", err)
    95  		}
    96  	}
    97  
    98  	p.curSet = p.NewSet(hd)
    99  	p.groups = &sortutil.ByLesser{
   100  		Lesser: sortutil.LesserFunc(func(a, b any) bool {
   101  			// Sort larger Groups first.
   102  			return p.Size(a) > p.Size(b)
   103  		}),
   104  	}
   105  
   106  	return nil
   107  }
   108  
   109  // AddGroup adds a Group to current Set being built, possibly emitting a new Set
   110  // and/or Page.  StartSet must be called before any calls to this method.  See
   111  // SetPager's documentation for the assumed order of the groups and this
   112  // method's relation to StartSet.
   113  func (p *SetPager) AddGroup(ctx context.Context, g Group) error {
   114  	if p.curSet == nil {
   115  		return errors.New("no Set currently being built")
   116  	}
   117  
   118  	// Setup p.curGrp; ensuring it is non-nil
   119  	sz := p.Size(g)
   120  	if p.SkipEmpty && sz == 0 {
   121  		return nil
   122  	} else if p.curGrp == nil {
   123  		p.curGrp = g
   124  	} else if c := p.Combine(p.curGrp, g); c != nil {
   125  		p.curGrp = c
   126  	} else {
   127  		// We can't combine the current group with g.  Push the current group onto
   128  		// the heap and make g the new current group.
   129  		heap.Push(p.groups, p.curGrp)
   130  		p.curGrp = g
   131  	}
   132  	// Update group size counters
   133  	p.resident += sz
   134  	p.total += sz
   135  
   136  	// Handle creation of pages when # of resident elements passes config value
   137  	for p.MaxPageSize > 0 && p.resident > p.MaxPageSize {
   138  		var eviction Group
   139  		// p.curGrp can be nil if we evicted it in a previous loop iteration
   140  		if p.curGrp != nil {
   141  			if p.Size(p.curGrp) > p.MaxPageSize {
   142  				// Split the large page; evict page exactly sized b.MaxPageSize
   143  				eviction, p.curGrp = p.Split(p.MaxPageSize, p.curGrp)
   144  			} else if p.groups.Len() == 0 || p.Size(p.curGrp) > p.Size(p.groups.Peek()) {
   145  				// Evict p.curGrp, it's larger than any other group we have
   146  				eviction, p.curGrp = p.curGrp, nil
   147  			}
   148  		}
   149  		if eviction == nil {
   150  			// Evict the largest group we have
   151  			eviction = heap.Pop(p.groups)
   152  		}
   153  
   154  		p.resident -= p.Size(eviction)
   155  		if err := p.OutputPage(ctx, p.curSet, eviction); err != nil {
   156  			return err
   157  		}
   158  	}
   159  
   160  	return nil
   161  }
   162  
   163  // Flush signals the end of the current Set being built, flushing it, and its
   164  // Groups to the output function.  This should be called after the final call to
   165  // AddGroup.  Manually calling Flush at any other time is unnecessary.
   166  func (p *SetPager) Flush(ctx context.Context) error {
   167  	if p == nil || p.curSet == nil {
   168  		return nil
   169  	} else if p.curGrp != nil {
   170  		p.groups.Push(p.curGrp) // the order of this last group doesn't matter
   171  	}
   172  
   173  	// grps := p.groups.Slice.([]Group)
   174  	grps := make([]Group, len(p.groups.Slice))
   175  	for i, g := range p.groups.Slice {
   176  		grps[i] = g
   177  	}
   178  
   179  	var err error
   180  	if !p.SkipEmpty || p.total > 0 {
   181  		err = p.OutputSet(ctx, p.total, p.curSet, grps)
   182  	}
   183  	p.curSet, p.curGrp, p.groups, p.resident, p.total = nil, nil, nil, 0, 0
   184  	return err
   185  }