github.com/tobgu/qframe@v0.4.0/grouper.go (about)

     1  package qframe
     2  
     3  import (
     4  	"github.com/tobgu/qframe/internal/grouper"
     5  	"github.com/tobgu/qframe/internal/icolumn"
     6  	"github.com/tobgu/qframe/internal/index"
     7  	"github.com/tobgu/qframe/qerrors"
     8  	"github.com/tobgu/qframe/types"
     9  )
    10  
    11  // GroupStats contains internal statistics for grouping.
    12  // Clients should not depend on this for any type of decision making. It is strictly "for info".
    13  // The layout may change if the underlying grouping mechanisms change.
    14  type GroupStats grouper.GroupStats
    15  
    16  // Grouper contains groups of rows produced by the QFrame.GroupBy function.
    17  type Grouper struct {
    18  	indices        []index.Int
    19  	groupedColumns []string
    20  	columns        []namedColumn
    21  	columnsByName  map[string]namedColumn
    22  	Err            error
    23  	Stats          GroupStats
    24  }
    25  
    26  // Aggregation represents a function to apply to a column.
    27  type Aggregation struct {
    28  	// Fn is the aggregation function to apply.
    29  	//
    30  	// IMPORTANT: For pointer and reference types you must not assume that the data passed argument
    31  	// to this function is valid after the function returns. If you plan to keep it around you need
    32  	// to take a copy of the data.
    33  	Fn types.SliceFuncOrBuiltInId
    34  
    35  	// Column is the name of the column to apply the aggregation to.
    36  	Column string
    37  
    38  	// As can be used to specify the destination column name, if not given defaults to the
    39  	// value of Column.
    40  	As string
    41  }
    42  
    43  // Aggregate applies the given aggregations to all row groups in the Grouper.
    44  //
    45  // Time complexity O(m*n) where m = number of aggregations, n = number of rows.
    46  func (g Grouper) Aggregate(aggs ...Aggregation) QFrame {
    47  	if g.Err != nil {
    48  		return QFrame{Err: g.Err}
    49  	}
    50  
    51  	// Loop over all groups and pick the first row in each of the groups.
    52  	// This index will be used to populate the grouped by columns below.
    53  	firstElementIx := make(index.Int, len(g.indices))
    54  	for i, ix := range g.indices {
    55  		firstElementIx[i] = ix[0]
    56  	}
    57  
    58  	newColumnsByName := make(map[string]namedColumn, len(g.groupedColumns)+len(aggs))
    59  	newColumns := make([]namedColumn, 0, len(g.groupedColumns)+len(aggs))
    60  	for i, colName := range g.groupedColumns {
    61  		col := g.columnsByName[colName]
    62  		col.pos = i
    63  		col.Column = col.Subset(firstElementIx)
    64  		newColumnsByName[colName] = col
    65  		newColumns = append(newColumns, col)
    66  	}
    67  
    68  	var err error
    69  	for _, agg := range aggs {
    70  		col, ok := g.columnsByName[agg.Column]
    71  		if !ok {
    72  			return QFrame{Err: qerrors.New("Aggregate", unknownCol(agg.Column))}
    73  		}
    74  
    75  		newColumnName := agg.Column
    76  		if agg.As != "" {
    77  			newColumnName = agg.As
    78  		}
    79  		col.name = newColumnName
    80  
    81  		_, ok = newColumnsByName[newColumnName]
    82  		if ok {
    83  			return QFrame{Err: qerrors.New(
    84  				"Aggregate",
    85  				"cannot aggregate on column that is part of group by or is already an aggregate: %s", newColumnName)}
    86  		}
    87  
    88  		if agg.Fn == "count" {
    89  			// Special convenience case for "count" which would normally require a cast from
    90  			// any other type of column to int before being executed.
    91  			counts := make([]int, len(g.indices))
    92  			for i, ix := range g.indices {
    93  				counts[i] = len(ix)
    94  			}
    95  
    96  			col.Column = icolumn.New(counts)
    97  		} else {
    98  			col.Column, err = col.Aggregate(g.indices, agg.Fn)
    99  			if err != nil {
   100  				return QFrame{Err: qerrors.Propagate("Aggregate", err)}
   101  			}
   102  		}
   103  
   104  		newColumnsByName[newColumnName] = col
   105  		newColumns = append(newColumns, col)
   106  	}
   107  
   108  	return QFrame{columns: newColumns, columnsByName: newColumnsByName, index: index.NewAscending(uint32(len(g.indices)))}
   109  }
   110  
   111  // QFrames returns a slice of QFrame where each frame represents the content of one group.
   112  //
   113  // Time complexity O(n) where n = number of groups.
   114  func (g Grouper) QFrames() ([]QFrame, error) {
   115  	if g.Err != nil {
   116  		return nil, g.Err
   117  	}
   118  
   119  	baseFrame := QFrame{columns: g.columns, columnsByName: g.columnsByName, index: index.Int{}}
   120  	result := make([]QFrame, len(g.indices))
   121  	for i, ix := range g.indices {
   122  		result[i] = baseFrame.withIndex(ix)
   123  	}
   124  	return result, nil
   125  }