github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/cmd/slicer/reduce.go (about)

     1  // Copyright 2019 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	"context"
     9  	"errors"
    10  	"flag"
    11  	"fmt"
    12  	"os"
    13  	"strconv"
    14  
    15  	"github.com/grailbio/base/log"
    16  	"github.com/grailbio/bigslice"
    17  	"github.com/grailbio/bigslice/exec"
    18  )
    19  
    20  var reduceTest = bigslice.Func(func(nshard, nkey int) (slice bigslice.Slice) {
    21  	log.Printf("reduceTest(%d, %d)", nshard, nkey)
    22  	slice = randomReader(nshard, nkey)
    23  	slice = bigslice.Map(slice, func(key string, xs []int) (string, int) { return key, len(xs) })
    24  	slice = bigslice.Reduce(slice, func(a int, e int) int {
    25  		return a + e
    26  	})
    27  	return
    28  })
    29  
    30  func reduce(sess *exec.Session, args []string) error {
    31  	var (
    32  		flags  = flag.NewFlagSet("cogroup", flag.ExitOnError)
    33  		nshard = flags.Int("nshard", 64, "number of shards")
    34  		nkey   = flags.Int("nkey", 1e6, "number of keys per shard")
    35  	)
    36  	flags.Usage = func() {
    37  		fmt.Fprintln(os.Stderr, `usage: slicer reduce [-nshard N] [-nkey N]`)
    38  		flags.PrintDefaults()
    39  		os.Exit(2)
    40  	}
    41  	if err := flags.Parse(args); err != nil {
    42  		log.Fatal(err)
    43  	}
    44  
    45  	ctx := context.Background()
    46  	r, err := sess.Run(ctx, reduceTest, *nshard, *nkey)
    47  	if err != nil {
    48  		return err
    49  	}
    50  	scan := r.Scanner()
    51  	defer scan.Close()
    52  	ok := true
    53  	errorf := func(format string, v ...interface{}) {
    54  		log.Error.Printf(format, v...)
    55  		ok = false
    56  	}
    57  	var (
    58  		keystr string
    59  		count  int
    60  		seen   = make([]bool, *nkey)
    61  	)
    62  	for scan.Scan(ctx, &keystr, &count) {
    63  		key, err := strconv.Atoi(keystr)
    64  		if err != nil {
    65  			panic(err)
    66  		}
    67  		if seen[key] {
    68  			errorf("saw key %v multiple times", key)
    69  		}
    70  		seen[key] = true
    71  		if count != *nshard {
    72  			errorf("wrong value for key %s: %v", key, count)
    73  		}
    74  	}
    75  	if err := scan.Err(); err != nil {
    76  		return err
    77  	}
    78  	for key, saw := range seen {
    79  		if !saw {
    80  			errorf("did not see key %v", key)
    81  		}
    82  	}
    83  	if !ok {
    84  		return errors.New("test errors")
    85  	}
    86  	fmt.Println("ok")
    87  	return nil
    88  }