go-hep.org/x/hep@v0.38.1/groot/rcmd/diff.go (about) 1 // Copyright ©2020 The go-hep Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package rcmd 6 7 import ( 8 "fmt" 9 "io" 10 "log" 11 "os" 12 stdpath "path" 13 "reflect" 14 "sort" 15 "strings" 16 17 "github.com/google/go-cmp/cmp" 18 "go-hep.org/x/hep/groot/riofs" 19 "go-hep.org/x/hep/groot/root" 20 "go-hep.org/x/hep/groot/rtree" 21 ) 22 23 // Diff compares the values of the list of keys between the two provided ROOT files. 24 // Diff writes the differing data (if any) to w. 25 // 26 // if w is nil, os.Stdout is used. 27 // if the slice of keys is nil, all keys are considered. 28 func Diff(w io.Writer, ref, chk *riofs.File, keys []string) error { 29 cmd, err := newDiffCmd(w, ref, chk, keys) 30 if err != nil { 31 return fmt.Errorf("could not compute keys to compare: %w", err) 32 } 33 34 return cmd.diffFiles() 35 } 36 37 type diffCmd struct { 38 w io.Writer 39 fref *riofs.File 40 fchk *riofs.File 41 keys []string 42 } 43 44 func newDiffCmd(w io.Writer, fref, fchk *riofs.File, keys []string) (*diffCmd, error) { 45 var ( 46 err error 47 ukeys []string 48 cmd = &diffCmd{fref: fref, fchk: fchk, w: w} 49 ) 50 51 if w == nil { 52 cmd.w = os.Stdout 53 } 54 55 if len(keys) != 0 { 56 for _, k := range keys { 57 k = strings.TrimSpace(k) 58 if k == "" { 59 continue 60 } 61 ukeys = append(ukeys, k) 62 } 63 64 if len(ukeys) == 0 { 65 return nil, fmt.Errorf("empty key set") 66 } 67 } else { 68 for _, k := range cmd.fref.Keys() { 69 ukeys = append(ukeys, k.Name()) 70 } 71 } 72 73 allgood := true 74 for _, k := range ukeys { 75 _, err = cmd.fref.Get(k) 76 if err != nil { 77 allgood = false 78 fmt.Fprintf(cmd.w, "key[%s] -- missing from ref-file\n", k) 79 log.Printf("key %q is missing from ref-file=%q", k, cmd.fref.Name()) 80 } 81 82 _, err = cmd.fchk.Get(k) 83 if err != nil { 84 allgood = false 85 fmt.Fprintf(cmd.w, "key[%s] -- missing from chk-file\n", k) 86 log.Printf("key %q is missing from chk-file=%q", k, cmd.fchk.Name()) 87 } 88 89 cmd.keys = append(cmd.keys, k) 90 } 91 92 if len(cmd.keys) == 0 { 93 return nil, fmt.Errorf("empty key set") 94 } 95 96 if !allgood { 97 return nil, fmt.Errorf("key set differ") 98 } 99 100 sort.Strings(cmd.keys) 101 return cmd, nil 102 } 103 104 func (cmd *diffCmd) diffFiles() error { 105 for _, key := range cmd.keys { 106 ref, err := cmd.fref.Get(key) 107 if err != nil { 108 return err 109 } 110 111 chk, err := cmd.fchk.Get(key) 112 if err != nil { 113 return err 114 } 115 116 err = cmd.diffObject(key, ref, chk) 117 if err != nil { 118 return err 119 } 120 } 121 122 return nil 123 } 124 125 func (cmd *diffCmd) diffObject(key string, ref, chk root.Object) error { 126 refType := reflect.TypeOf(ref) 127 chkType := reflect.TypeOf(chk) 128 129 if !reflect.DeepEqual(refType, chkType) { 130 return fmt.Errorf("%s: type of keys differ: ref=%v chk=%v", key, refType, chkType) 131 } 132 133 switch ref := ref.(type) { 134 case rtree.Tree: 135 return cmd.diffTree(key, ref, chk.(rtree.Tree)) 136 case riofs.Directory: 137 return cmd.diffDir(key, ref, chk.(riofs.Directory)) 138 139 case root.Object: 140 ok := reflect.DeepEqual(ref, chk) 141 if !ok { 142 fmt.Fprintf(cmd.w, "key[%s] (%T) -- (-ref +chk)\n-%v\n+%v\n", key, ref, ref, chk) 143 return fmt.Errorf("%s: keys differ", key) 144 } 145 return nil 146 default: 147 return fmt.Errorf("unhandled type %T (key=%v)", ref, key) 148 } 149 } 150 151 func (cmd *diffCmd) diffDir(key string, ref, chk riofs.Directory) error { 152 kref := ref.Keys() 153 kchk := chk.Keys() 154 if len(kref) != len(kchk) { 155 return fmt.Errorf("%s: number of keys in directory differ: ref=%d, chk=%d", key, len(kref), len(kchk)) 156 } 157 158 krefset := make(map[string]struct{}) 159 kchkset := make(map[string]struct{}) 160 for _, k := range kref { 161 krefset[k.Name()] = struct{}{} 162 } 163 for _, k := range kchk { 164 kchkset[k.Name()] = struct{}{} 165 } 166 refnames := make([]string, 0, len(krefset)) 167 for k := range krefset { 168 refnames = append(refnames, k) 169 } 170 chknames := make([]string, 0, len(kchkset)) 171 for k := range kchkset { 172 chknames = append(chknames, k) 173 } 174 sort.Strings(refnames) 175 sort.Strings(chknames) 176 if len(krefset) != len(kchkset) { 177 return fmt.Errorf("%s: keys in directory differ: ref=%s, chk=%s", key, refnames, chknames) 178 } 179 180 for _, k := range refnames { 181 oref, err := ref.Get(k) 182 if err != nil { 183 return fmt.Errorf("%s: could not retrieve %s from ref-directory", key, k) 184 } 185 ochk, err := chk.Get(k) 186 if err != nil { 187 return fmt.Errorf("%s: could not retrieve %s from chk-directory", key, k) 188 } 189 190 err = cmd.diffObject(stdpath.Join(key, k), oref, ochk) 191 if err != nil { 192 return fmt.Errorf("%s: values for %s in directory differ: %w", key, k, err) 193 } 194 } 195 196 return nil 197 } 198 199 func (cmd *diffCmd) diffTree(key string, ref, chk rtree.Tree) error { 200 if eref, echk := ref.Entries(), chk.Entries(); eref != echk { 201 return fmt.Errorf("%s: number of entries differ: ref=%v chk=%v", key, eref, echk) 202 } 203 204 refVars := rtree.NewReadVars(ref) 205 chkVars := rtree.NewReadVars(chk) 206 207 quit := make(chan struct{}) 208 defer close(quit) 209 210 refc := make(chan treeEntry) 211 chkc := make(chan treeEntry) 212 213 go cmd.treeDump(quit, refc, ref, refVars) 214 go cmd.treeDump(quit, chkc, chk, chkVars) 215 216 allgood := true 217 n := chk.Entries() 218 for i := int64(0); i < n; i++ { 219 ref := <-refc 220 chk := <-chkc 221 if ref.err != nil { 222 return fmt.Errorf("%s: error reading ref-tree: %w", key, ref.err) 223 } 224 if chk.err != nil { 225 return fmt.Errorf("%s: error reading chk-tree: %w", key, chk.err) 226 } 227 if chk.n != ref.n { 228 return fmt.Errorf("%s: tree out of sync (ref=%d, chk=%d)", key, ref.n, chk.n) 229 } 230 231 for ii := range refVars { 232 var ( 233 ref = reflect.Indirect(reflect.ValueOf(refVars[ii].Value)).Interface() 234 chk = reflect.Indirect(reflect.ValueOf(chkVars[ii].Value)).Interface() 235 diff = cmp.Diff(ref, chk) 236 ) 237 if diff != "" { 238 fmt.Fprintf(cmd.w, "key[%s][%04d].%s -- (-ref +chk)\n%s", key, i, refVars[ii].Name, diff) 239 allgood = false 240 } 241 } 242 ref.ok <- 1 243 chk.ok <- 1 244 } 245 246 if !allgood { 247 return fmt.Errorf("%s: trees differ", key) 248 } 249 250 return nil 251 } 252 253 type treeEntry struct { 254 n int64 255 err error 256 ok chan int 257 } 258 259 func (cmd *diffCmd) treeDump(quit chan struct{}, out chan treeEntry, t rtree.Tree, vars []rtree.ReadVar) { 260 r, err := rtree.NewReader(t, vars) 261 if err != nil { 262 out <- treeEntry{err: err} 263 return 264 } 265 defer r.Close() 266 267 defer close(out) 268 269 next := make(chan int) 270 err = r.Read(func(ctx rtree.RCtx) error { 271 select { 272 case <-quit: 273 return io.EOF 274 case out <- treeEntry{err: nil, n: ctx.Entry, ok: next}: 275 <-next 276 return nil 277 } 278 }) 279 if err != nil { 280 out <- treeEntry{err: err} 281 } 282 }