github.com/puellanivis/breton@v0.2.16/lib/mapreduce/engine.go (about) 1 package mapreduce 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "sync" 8 ) 9 10 type engine struct { 11 MapReduce 12 } 13 14 // threadCount returns the valid threadCount value to use based on configuration. 15 // It guards against invalid values. 16 func (e *engine) threadCount() int { 17 n := e.conf.threadCount 18 19 if n < 1 { 20 n = DefaultThreadCount 21 22 if n < 1 { 23 // Even if the package-level Default was set to less than one, 24 // we need to ensure it is at least one. 25 n = 1 26 } 27 28 e.conf.threadCount = n 29 } 30 31 return n 32 } 33 34 func quickError(err error) <-chan error { 35 errch := make(chan error, 1) 36 37 if err != nil { 38 errch <- err 39 } 40 41 close(errch) 42 return errch 43 } 44 45 func (e *engine) run(ctx context.Context, rng Range) <-chan error { 46 width := rng.Width() 47 if width < 1 { 48 return quickError(errors.New("bad range")) 49 } 50 51 threads := e.threadCount() 52 53 mappers := e.conf.mapperCount 54 if mappers < 1 { 55 mappers = threads 56 } 57 58 stripe := width / mappers 59 extraWork := width % mappers // How many mappers need one more element in order to cover the whole width. 60 61 switch { 62 case e.conf.stripeSize > 0: 63 maxSize := e.conf.stripeSize 64 65 // We need to calculate the stripe size for an extra-work mapper, if there are extra-work mappers. 66 maxWorkSize := stripe 67 if extraWork > 0 { 68 maxWorkSize++ 69 } 70 71 if maxWorkSize > maxSize { 72 // We only recalculate mapper count if the stripe size is greater than the max stripe size. 73 stripe = maxSize 74 extraWork = 0 75 76 // Here, the math is simple, but the code is complex. 77 // 78 // Our mapper count is ⌈width ÷ stripe⌉, 79 // but integer math on computers gives ⌊width ÷ stripe⌋. 80 mappers = width / stripe 81 82 if width%stripe > 0 { 83 // So, if the work does not split up exactly, so we need another mapper. 84 mappers++ 85 86 // And now, we may as well just recalculate the whole coverage anew… just to be sure. 87 stripe = width / mappers 88 extraWork = width % mappers 89 } 90 } 91 92 case e.conf.stripeSize < 0: 93 minSize := -e.conf.stripeSize 94 95 // stripe is already the smallest work size. 96 97 if stripe < minSize { 98 // We only recalculate mapper count if the stripe size is less than the min stripe size. 99 stripe = minSize 100 101 // Here, the math is simple, and the code is simple. 102 // 103 // Our mapper count is ⌊width ÷ stripe⌋. 104 mappers = width / stripe 105 106 // Now we just need to recalculate the extra coverage. 107 extraWork = width % mappers 108 } 109 } 110 111 var reducerMutex sync.Mutex 112 pool := newThreadPool(threads) 113 chain := newExecChain(e.conf.ordered) 114 115 var wg sync.WaitGroup 116 wg.Add(mappers) 117 errch := make(chan error, mappers) 118 119 go func() { 120 wg.Wait() 121 close(errch) 122 }() 123 124 last := rng.Start 125 for i := 0; i < mappers; i++ { 126 start := last 127 end := start + stripe 128 129 if i < extraWork { 130 end++ 131 } 132 133 if end > rng.End { 134 end = rng.End 135 } 136 last = end 137 138 ready, next := chain.next() 139 140 go func() { 141 defer func() { 142 wg.Done() 143 if next != nil { 144 close(next) 145 } 146 }() 147 148 rng := Range{ 149 Start: start, 150 End: end, 151 } 152 153 if err := pool.wait(ctx); err != nil { 154 errch <- err 155 return 156 } 157 158 out, err := e.m.Map(ctx, rng) 159 if err != nil { 160 errch <- err 161 return 162 } 163 164 if err := pool.done(ctx); err != nil { 165 errch <- err 166 return 167 } 168 169 if out == nil || e.r == nil { 170 return 171 } 172 173 select { 174 case <-ready: 175 case <-ctx.Done(): 176 errch <- ctx.Err() 177 return 178 } 179 180 reducerMutex.Lock() 181 defer reducerMutex.Unlock() 182 183 // Our context may have expired waiting for mutex, so check again. 184 select { 185 case <-ctx.Done(): 186 errch <- ctx.Err() 187 return 188 default: 189 } 190 191 if err := e.r.Reduce(ctx, out); err != nil { 192 errch <- err 193 } 194 }() 195 } 196 197 if last != rng.End { 198 panic(fmt.Errorf("dropped entries! %d != %d", last, rng.End)) 199 } 200 201 return errch 202 }