go-hep.org/x/hep@v0.38.1/groot/rcmd/diff.go (about)

     1  // Copyright ©2020 The go-hep Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package rcmd
     6  
     7  import (
     8  	"fmt"
     9  	"io"
    10  	"log"
    11  	"os"
    12  	stdpath "path"
    13  	"reflect"
    14  	"sort"
    15  	"strings"
    16  
    17  	"github.com/google/go-cmp/cmp"
    18  	"go-hep.org/x/hep/groot/riofs"
    19  	"go-hep.org/x/hep/groot/root"
    20  	"go-hep.org/x/hep/groot/rtree"
    21  )
    22  
    23  // Diff compares the values of the list of keys between the two provided ROOT files.
    24  // Diff writes the differing data (if any) to w.
    25  //
    26  // if w is nil, os.Stdout is used.
    27  // if the slice of keys is nil, all keys are considered.
    28  func Diff(w io.Writer, ref, chk *riofs.File, keys []string) error {
    29  	cmd, err := newDiffCmd(w, ref, chk, keys)
    30  	if err != nil {
    31  		return fmt.Errorf("could not compute keys to compare: %w", err)
    32  	}
    33  
    34  	return cmd.diffFiles()
    35  }
    36  
    37  type diffCmd struct {
    38  	w    io.Writer
    39  	fref *riofs.File
    40  	fchk *riofs.File
    41  	keys []string
    42  }
    43  
    44  func newDiffCmd(w io.Writer, fref, fchk *riofs.File, keys []string) (*diffCmd, error) {
    45  	var (
    46  		err   error
    47  		ukeys []string
    48  		cmd   = &diffCmd{fref: fref, fchk: fchk, w: w}
    49  	)
    50  
    51  	if w == nil {
    52  		cmd.w = os.Stdout
    53  	}
    54  
    55  	if len(keys) != 0 {
    56  		for _, k := range keys {
    57  			k = strings.TrimSpace(k)
    58  			if k == "" {
    59  				continue
    60  			}
    61  			ukeys = append(ukeys, k)
    62  		}
    63  
    64  		if len(ukeys) == 0 {
    65  			return nil, fmt.Errorf("empty key set")
    66  		}
    67  	} else {
    68  		for _, k := range cmd.fref.Keys() {
    69  			ukeys = append(ukeys, k.Name())
    70  		}
    71  	}
    72  
    73  	allgood := true
    74  	for _, k := range ukeys {
    75  		_, err = cmd.fref.Get(k)
    76  		if err != nil {
    77  			allgood = false
    78  			fmt.Fprintf(cmd.w, "key[%s] -- missing from ref-file\n", k)
    79  			log.Printf("key %q is missing from ref-file=%q", k, cmd.fref.Name())
    80  		}
    81  
    82  		_, err = cmd.fchk.Get(k)
    83  		if err != nil {
    84  			allgood = false
    85  			fmt.Fprintf(cmd.w, "key[%s] -- missing from chk-file\n", k)
    86  			log.Printf("key %q is missing from chk-file=%q", k, cmd.fchk.Name())
    87  		}
    88  
    89  		cmd.keys = append(cmd.keys, k)
    90  	}
    91  
    92  	if len(cmd.keys) == 0 {
    93  		return nil, fmt.Errorf("empty key set")
    94  	}
    95  
    96  	if !allgood {
    97  		return nil, fmt.Errorf("key set differ")
    98  	}
    99  
   100  	sort.Strings(cmd.keys)
   101  	return cmd, nil
   102  }
   103  
   104  func (cmd *diffCmd) diffFiles() error {
   105  	for _, key := range cmd.keys {
   106  		ref, err := cmd.fref.Get(key)
   107  		if err != nil {
   108  			return err
   109  		}
   110  
   111  		chk, err := cmd.fchk.Get(key)
   112  		if err != nil {
   113  			return err
   114  		}
   115  
   116  		err = cmd.diffObject(key, ref, chk)
   117  		if err != nil {
   118  			return err
   119  		}
   120  	}
   121  
   122  	return nil
   123  }
   124  
   125  func (cmd *diffCmd) diffObject(key string, ref, chk root.Object) error {
   126  	refType := reflect.TypeOf(ref)
   127  	chkType := reflect.TypeOf(chk)
   128  
   129  	if !reflect.DeepEqual(refType, chkType) {
   130  		return fmt.Errorf("%s: type of keys differ: ref=%v chk=%v", key, refType, chkType)
   131  	}
   132  
   133  	switch ref := ref.(type) {
   134  	case rtree.Tree:
   135  		return cmd.diffTree(key, ref, chk.(rtree.Tree))
   136  	case riofs.Directory:
   137  		return cmd.diffDir(key, ref, chk.(riofs.Directory))
   138  
   139  	case root.Object:
   140  		ok := reflect.DeepEqual(ref, chk)
   141  		if !ok {
   142  			fmt.Fprintf(cmd.w, "key[%s] (%T) -- (-ref +chk)\n-%v\n+%v\n", key, ref, ref, chk)
   143  			return fmt.Errorf("%s: keys differ", key)
   144  		}
   145  		return nil
   146  	default:
   147  		return fmt.Errorf("unhandled type %T (key=%v)", ref, key)
   148  	}
   149  }
   150  
   151  func (cmd *diffCmd) diffDir(key string, ref, chk riofs.Directory) error {
   152  	kref := ref.Keys()
   153  	kchk := chk.Keys()
   154  	if len(kref) != len(kchk) {
   155  		return fmt.Errorf("%s: number of keys in directory differ: ref=%d, chk=%d", key, len(kref), len(kchk))
   156  	}
   157  
   158  	krefset := make(map[string]struct{})
   159  	kchkset := make(map[string]struct{})
   160  	for _, k := range kref {
   161  		krefset[k.Name()] = struct{}{}
   162  	}
   163  	for _, k := range kchk {
   164  		kchkset[k.Name()] = struct{}{}
   165  	}
   166  	refnames := make([]string, 0, len(krefset))
   167  	for k := range krefset {
   168  		refnames = append(refnames, k)
   169  	}
   170  	chknames := make([]string, 0, len(kchkset))
   171  	for k := range kchkset {
   172  		chknames = append(chknames, k)
   173  	}
   174  	sort.Strings(refnames)
   175  	sort.Strings(chknames)
   176  	if len(krefset) != len(kchkset) {
   177  		return fmt.Errorf("%s: keys in directory differ: ref=%s, chk=%s", key, refnames, chknames)
   178  	}
   179  
   180  	for _, k := range refnames {
   181  		oref, err := ref.Get(k)
   182  		if err != nil {
   183  			return fmt.Errorf("%s: could not retrieve %s from ref-directory", key, k)
   184  		}
   185  		ochk, err := chk.Get(k)
   186  		if err != nil {
   187  			return fmt.Errorf("%s: could not retrieve %s from chk-directory", key, k)
   188  		}
   189  
   190  		err = cmd.diffObject(stdpath.Join(key, k), oref, ochk)
   191  		if err != nil {
   192  			return fmt.Errorf("%s: values for %s in directory differ: %w", key, k, err)
   193  		}
   194  	}
   195  
   196  	return nil
   197  }
   198  
   199  func (cmd *diffCmd) diffTree(key string, ref, chk rtree.Tree) error {
   200  	if eref, echk := ref.Entries(), chk.Entries(); eref != echk {
   201  		return fmt.Errorf("%s: number of entries differ: ref=%v chk=%v", key, eref, echk)
   202  	}
   203  
   204  	refVars := rtree.NewReadVars(ref)
   205  	chkVars := rtree.NewReadVars(chk)
   206  
   207  	quit := make(chan struct{})
   208  	defer close(quit)
   209  
   210  	refc := make(chan treeEntry)
   211  	chkc := make(chan treeEntry)
   212  
   213  	go cmd.treeDump(quit, refc, ref, refVars)
   214  	go cmd.treeDump(quit, chkc, chk, chkVars)
   215  
   216  	allgood := true
   217  	n := chk.Entries()
   218  	for i := int64(0); i < n; i++ {
   219  		ref := <-refc
   220  		chk := <-chkc
   221  		if ref.err != nil {
   222  			return fmt.Errorf("%s: error reading ref-tree: %w", key, ref.err)
   223  		}
   224  		if chk.err != nil {
   225  			return fmt.Errorf("%s: error reading chk-tree: %w", key, chk.err)
   226  		}
   227  		if chk.n != ref.n {
   228  			return fmt.Errorf("%s: tree out of sync (ref=%d, chk=%d)", key, ref.n, chk.n)
   229  		}
   230  
   231  		for ii := range refVars {
   232  			var (
   233  				ref  = reflect.Indirect(reflect.ValueOf(refVars[ii].Value)).Interface()
   234  				chk  = reflect.Indirect(reflect.ValueOf(chkVars[ii].Value)).Interface()
   235  				diff = cmp.Diff(ref, chk)
   236  			)
   237  			if diff != "" {
   238  				fmt.Fprintf(cmd.w, "key[%s][%04d].%s -- (-ref +chk)\n%s", key, i, refVars[ii].Name, diff)
   239  				allgood = false
   240  			}
   241  		}
   242  		ref.ok <- 1
   243  		chk.ok <- 1
   244  	}
   245  
   246  	if !allgood {
   247  		return fmt.Errorf("%s: trees differ", key)
   248  	}
   249  
   250  	return nil
   251  }
   252  
   253  type treeEntry struct {
   254  	n   int64
   255  	err error
   256  	ok  chan int
   257  }
   258  
   259  func (cmd *diffCmd) treeDump(quit chan struct{}, out chan treeEntry, t rtree.Tree, vars []rtree.ReadVar) {
   260  	r, err := rtree.NewReader(t, vars)
   261  	if err != nil {
   262  		out <- treeEntry{err: err}
   263  		return
   264  	}
   265  	defer r.Close()
   266  
   267  	defer close(out)
   268  
   269  	next := make(chan int)
   270  	err = r.Read(func(ctx rtree.RCtx) error {
   271  		select {
   272  		case <-quit:
   273  			return io.EOF
   274  		case out <- treeEntry{err: nil, n: ctx.Entry, ok: next}:
   275  			<-next
   276  			return nil
   277  		}
   278  	})
   279  	if err != nil {
   280  		out <- treeEntry{err: err}
   281  	}
   282  }