github.com/yaricom/goNEAT@v0.0.0-20210507221059-e2110b885482/experiments/pole/cart2pole.go (about) 1 package pole 2 3 import ( 4 "github.com/yaricom/goNEAT/experiments" 5 "github.com/yaricom/goNEAT/neat/network" 6 "fmt" 7 "github.com/yaricom/goNEAT/neat" 8 "math" 9 "github.com/yaricom/goNEAT/neat/genetics" 10 "os" 11 "sort" 12 ) 13 14 const thirty_six_degrees = 36 * math.Pi / 180.0 15 16 // The maximal number of time steps for Markov experiment 17 const markov_max_steps = 100000 18 // The maximal number of time steps for Non-Markov long run 19 const non_markov_long_max_steps = 100000 20 // The maximal number of time steps for Non-Markov generalization run 21 const non_markov_generalization_max_steps = 1000 22 23 24 // The double pole-balancing experiment both Markov and non-Markov versions 25 type CartDoublePoleGenerationEvaluator struct { 26 // The output path to store execution results 27 OutputPath string 28 // The flag to indicate whether to apply Markov evaluation variant 29 Markov bool 30 31 // The flag to indicate whether to use continuous activation or discrete 32 ActionType experiments.ActionType 33 } 34 35 // The structure to describe cart pole emulation 36 type CartPole struct { 37 // The flag to indicate that we are executing Markov experiment setup (known velocities information) 38 isMarkov bool 39 // Flag that we are looking at the champion in Non-Markov experiment 40 nonMarkovLong bool 41 // Flag that we are testing champion's generalization 42 generalizationTest bool 43 44 // The state of the system (x, ∆x/∆t, θ1, ∆θ1/∆t, θ2, ∆θ2/∆t) 45 state [6]float64 46 47 // The number of balanced time steps passed for current organism evaluation 48 balanced_time_steps int 49 50 jiggleStep [1000]float64 51 52 // Queues used for Gruau's fitness which damps oscillations 53 54 cartpos_sum float64 55 cartv_sum float64 56 polepos_sum float64 57 polev_sum float64 58 } 59 60 // Perform evaluation of one epoch on double pole balancing 61 func (ex CartDoublePoleGenerationEvaluator) GenerationEvaluate(pop *genetics.Population, epoch *experiments.Generation, context *neat.NeatContext) (err error) { 62 cartPole := newCartPole(ex.Markov) 63 64 cartPole.nonMarkovLong = false 65 cartPole.generalizationTest = false 66 67 // Evaluate each organism on a test 68 for _, org := range pop.Organisms { 69 winner, err := ex.orgEvaluate(org, cartPole) 70 if err != nil { 71 return err 72 } 73 74 if winner && (epoch.Best == nil || org.Fitness > epoch.Best.Fitness){ 75 // This will be winner in Markov case 76 epoch.Solved = true 77 epoch.WinnerNodes = len(org.Genotype.Nodes) 78 epoch.WinnerGenes = org.Genotype.Extrons() 79 epoch.WinnerEvals = context.PopSize * epoch.Id + org.Genotype.Id 80 epoch.Best = org 81 org.IsWinner = true 82 } 83 } 84 85 // Check for winner in Non-Markov case 86 if !ex.Markov { 87 // The best individual (i.e. the one with the highest fitness value) of every generation is tested for 88 // its ability to balance the system for a longer time period. If a potential solution passes this test 89 // by keeping the system balanced for 100’000 time steps, the so called generalization score(GS) of this 90 // particular individual is calculated. This score measures the potential of a controller to balance the 91 // system starting from different initial conditions. It's calculated with a series of experiments, running 92 // over 1000 time steps, starting from 625 different initial conditions. 93 // The initial conditions are chosen by assigning each value of the set Ω = [0.05 0.25 0.5 0.75 0.95] to 94 // each of the states x, ∆x/∆t, θ1 and ∆θ1/∆t, scaled to the range of the variables.The short pole angle θ2 95 // and its angular velocity ∆θ2/∆t are set to zero. The GS is then defined as the number of successful runs 96 // from the 625 initial conditions and an individual is defined as a solution if it reaches a generalization 97 // score of 200 or more. 98 99 // Sort the species by max organism fitness in descending order - the highest fitness first 100 sorted_species := make([]*genetics.Species, len(pop.Species)) 101 copy(sorted_species, pop.Species) 102 sort.Sort(sort.Reverse(genetics.ByOrganismFitness(sorted_species))) 103 104 // First update what is checked and unchecked 105 var curr_species *genetics.Species 106 for _, curr_species = range sorted_species { 107 max, _ := curr_species.ComputeMaxAndAvgFitness() 108 if max > curr_species.MaxFitnessEver { 109 curr_species.IsChecked = false 110 } 111 } 112 113 // Now find first (most fit) species that is unchecked 114 curr_species = nil 115 for _, curr_species = range sorted_species { 116 if !curr_species.IsChecked { 117 break 118 } 119 } 120 if curr_species == nil { 121 curr_species = sorted_species[0] 122 } 123 124 // Remember it was checked 125 curr_species.IsChecked = true 126 127 // the organism champion 128 champion := curr_species.FindChampion() 129 champion_fitness := champion.Fitness 130 131 // Now check to make sure the champion can do 100'000 evaluations 132 cartPole.nonMarkovLong = true 133 cartPole.generalizationTest = false 134 135 longRunPassed, err := ex.orgEvaluate(champion, cartPole) 136 if err != nil { 137 return err 138 } 139 if longRunPassed { 140 141 // the champion passed non-Markov long test, start generalization 142 cartPole.nonMarkovLong = false 143 cartPole.generalizationTest = true 144 145 // Given that the champion passed long run test, now run it on generalization tests running 146 // over 1'000 time steps, starting from 625 different initial conditions 147 state_vals := [5]float64{0.05, 0.25, 0.5, 0.75, 0.95} 148 generalization_score := 0 149 for s0c := 0; s0c < 5; s0c++ { 150 for s1c := 0; s1c < 5; s1c++ { 151 for s2c := 0; s2c < 5; s2c++ { 152 for s3c := 0; s3c < 5; s3c++ { 153 cartPole.state[0] = state_vals[s0c] * 4.32 - 2.16 154 cartPole.state[1] = state_vals[s1c] * 2.70 - 1.35 155 cartPole.state[2] = state_vals[s2c] * 0.12566304 - 0.06283152 // 0.06283152 = 3.6 degrees 156 cartPole.state[3] = state_vals[s3c] * 0.30019504 - 0.15009752 // 0.15009752 = 8.6 degrees 157 // The short pole angle and its angular velocity are set to zero. 158 cartPole.state[4] = 0.0 159 cartPole.state[5] = 0.0 160 161 // The champion needs to be flushed here because it may have 162 // leftover activation from its last test run that could affect 163 // its recurrent memory 164 champion.Phenotype.Flush() 165 166 if generalized, err := ex.orgEvaluate(champion, cartPole); generalized { 167 generalization_score++ 168 169 if neat.LogLevel == neat.LogLevelDebug { 170 neat.DebugLog( 171 fmt.Sprintf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f\n", 172 cartPole.state[0], cartPole.state[1], 173 cartPole.state[2], cartPole.state[4], thirty_six_degrees)) 174 } 175 } else if err != nil { 176 return err 177 } 178 } 179 } 180 } 181 } 182 183 if generalization_score >= 200 { 184 // The generalization test winner 185 neat.InfoLog( 186 fmt.Sprintf("The non-Markov champion found! (Generalization Score = %d)", 187 generalization_score)) 188 champion.Fitness = float64(generalization_score) 189 champion.IsWinner = true 190 epoch.Solved = true 191 epoch.WinnerNodes = len(champion.Genotype.Nodes) 192 epoch.WinnerGenes = champion.Genotype.Extrons() 193 epoch.WinnerEvals = context.PopSize * epoch.Id + champion.Genotype.Id 194 epoch.Best = champion 195 } else { 196 neat.InfoLog("The non-Markov champion unable to generalize") 197 champion.Fitness = champion_fitness // Restore the champ's fitness 198 champion.IsWinner = false 199 } 200 } else { 201 neat.InfoLog("The non-Markov champion missed the 100'000 run test") 202 champion.Fitness = champion_fitness // Restore the champ's fitness 203 champion.IsWinner = false 204 } 205 } 206 207 208 // Fill statistics about current epoch 209 epoch.FillPopulationStatistics(pop) 210 211 // Only print to file every print_every generations 212 if epoch.Solved || epoch.Id % context.PrintEvery == 0 { 213 pop_path := fmt.Sprintf("%s/gen_%d", experiments.OutDirForTrial(ex.OutputPath, epoch.TrialId), epoch.Id) 214 file, err := os.Create(pop_path) 215 if err != nil { 216 neat.ErrorLog(fmt.Sprintf("Failed to dump population, reason: %s\n", err)) 217 } else { 218 pop.WriteBySpecies(file) 219 } 220 } 221 222 if epoch.Solved { 223 // print winner organism 224 for _, org := range pop.Organisms { 225 if org.IsWinner { 226 // Prints the winner organism to file! 227 org_path := fmt.Sprintf("%s/%s_%.1f_%d-%d", experiments.OutDirForTrial(ex.OutputPath, epoch.TrialId), 228 "pole2_winner", org.Fitness, org.Phenotype.NodeCount(), org.Phenotype.LinkCount()) 229 file, err := os.Create(org_path) 230 if err != nil { 231 neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism genome, reason: %s\n", err)) 232 } else { 233 org.Genotype.Write(file) 234 neat.InfoLog(fmt.Sprintf("Generation #%d winner %d dumped to: %s\n", epoch.Id, org.Genotype.Id, org_path)) 235 } 236 break 237 } 238 } 239 } 240 241 return err 242 } 243 244 // This methods evaluates provided organism for cart double pole-balancing task 245 func (ex *CartDoublePoleGenerationEvaluator) orgEvaluate(organism *genetics.Organism, cartPole *CartPole) (winner bool, err error) { 246 // Try to balance a pole now 247 organism.Fitness, err = cartPole.evalNet(organism.Phenotype, ex.ActionType) 248 if err != nil { 249 return false, err 250 } 251 252 if neat.LogLevel == neat.LogLevelDebug { 253 neat.DebugLog(fmt.Sprintf("Organism #%3d\tfitness: %f", organism.Genotype.Id, organism.Fitness)) 254 } 255 256 // DEBUG CHECK if organism is damaged 257 if !(cartPole.nonMarkovLong && cartPole.generalizationTest) && organism.CheckChampionChildDamaged() { 258 neat.WarnLog(fmt.Sprintf("ORGANISM DAMAGED:\n%s", organism.Genotype)) 259 } 260 261 // Decide if its a winner, in Markov Case 262 if cartPole.isMarkov { 263 if organism.Fitness >= markov_max_steps { 264 winner = true 265 organism.Fitness = 1.0 266 organism.Error = 0.0 267 } else { 268 // we use linear scale 269 organism.Error = (markov_max_steps - organism.Fitness) / markov_max_steps 270 organism.Fitness = 1.0 - organism.Error 271 } 272 } else if cartPole.nonMarkovLong { 273 // if doing the long test non-markov 274 if organism.Fitness >= non_markov_long_max_steps { 275 winner = true 276 } 277 } else if cartPole.generalizationTest { 278 if organism.Fitness >= non_markov_generalization_max_steps { 279 winner = true 280 } 281 } else { 282 winner = false 283 } 284 return winner, err 285 } 286 287 288 // If markov is false, then velocity information will be withheld from the network population (non-Markov) 289 func newCartPole(markov bool) *CartPole { 290 return &CartPole { 291 isMarkov: markov, 292 } 293 } 294 295 func (cp *CartPole)evalNet(net *network.Network, actionType experiments.ActionType) (steps float64, err error) { 296 non_markov_max := non_markov_generalization_max_steps 297 if cp.nonMarkovLong { 298 non_markov_max = non_markov_long_max_steps 299 } 300 301 302 303 cp.resetState() 304 305 if cp.isMarkov { 306 input := make([]float64, 7) 307 for steps = 0; steps < markov_max_steps; steps++ { 308 input[0] = (cp.state[0] + 2.4) / 4.8 309 input[1] = (cp.state[1] + 1.0) / 2.0 310 input[2] = (cp.state[2] + thirty_six_degrees) / (thirty_six_degrees * 2.0)//0.52 311 input[3] = (cp.state[3] + 1.0) / 2.0 312 input[4] = (cp.state[4] + thirty_six_degrees) / (thirty_six_degrees * 2.0)//0.52 313 input[5] = (cp.state[5] + 1.0) / 2.0 314 input[6] = 0.5 315 316 net.LoadSensors(input) 317 318 /*-- activate the network based on the input --*/ 319 if res, err := net.Activate(); !res { 320 //If it loops, exit returning only fitness of 1 step 321 neat.DebugLog(fmt.Sprintf("Failed to activate Network, reason: %s", err)) 322 return 1.0, nil 323 } 324 action := net.Outputs[0].Activation 325 if actionType == experiments.DiscreteAction { 326 // make action values discrete 327 if action < 0.5 { 328 action = 0 329 } else { 330 action = 1 331 } 332 } 333 cp.performAction(action, steps) 334 335 if cp.outsideBounds() { 336 // if failure stop it now 337 break; 338 } 339 340 //fmt.Printf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f\n", cp.state[0], cp.state[1], cp.state[2], cp.state[4], thirty_six_degrees) 341 } 342 return steps, nil 343 } else { 344 input := make([]float64, 4) 345 // The non Markov case 346 for steps = 0; steps < float64(non_markov_max); steps++ { 347 input[0] = cp.state[0] / 4.8 348 input[1] = cp.state[2] / 0.52 349 input[2] = cp.state[4] / 0.52 350 input[3] = 1.0 351 352 err = net.LoadSensors(input) 353 if err != nil { 354 return 0, err 355 } 356 357 /*-- activate the network based on the input --*/ 358 if res, err := net.Activate(); !res { 359 // If it loops, exit returning only fitness of 1 step 360 neat.WarnLog(fmt.Sprintf("Failed to activate Network, reason: %s", err)) 361 return 0.0001, err 362 } 363 364 action := net.Outputs[0].Activation 365 if actionType == experiments.DiscreteAction { 366 // make action values discrete 367 if action < 0.5 { 368 action = 0 369 } else { 370 action = 1 371 } 372 } 373 cp.performAction(action, steps) 374 if cp.outsideBounds() { 375 //fmt.Printf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f, steps: %f\n", 376 // cp.state[0], cp.state[1], cp.state[2], cp.state[4], thirty_six_degrees, steps) 377 // if failure stop it now 378 break; 379 } 380 381 382 } 383 /*-- If we are generalizing we just need to balance it for a while --*/ 384 if cp.generalizationTest { 385 return float64(cp.balanced_time_steps), nil 386 } 387 388 // Sum last 100 389 jiggle_total := 0.0 390 if steps >= 100.0 && !cp.nonMarkovLong { 391 // Adjust for array bounds and count 392 for count := int(steps - 100.0); count < int(steps); count++ { 393 jiggle_total += cp.jiggleStep[count] 394 } 395 } 396 if !cp.nonMarkovLong { 397 var non_markov_fitness float64 398 if cp.balanced_time_steps >= 100 { 399 // F = 0.1f1 + 0.9f2 400 non_markov_fitness = 0.1 * float64(cp.balanced_time_steps) / 1000.0 + 0.9 * 0.75 / float64(jiggle_total) 401 } else { 402 // F = t / 1000 403 non_markov_fitness = 0.1 * float64(cp.balanced_time_steps) / 1000.0 404 } 405 if neat.LogLevel == neat.LogLevelDebug { 406 neat.DebugLog(fmt.Sprintf("Balanced time steps: %d, jiggle: %f ***\n", 407 cp.balanced_time_steps, jiggle_total)) 408 } 409 return non_markov_fitness, nil 410 } else { 411 return steps, nil 412 } 413 } 414 } 415 416 func (cp *CartPole) performAction(action, step_num float64) { 417 const TAU = 0.01 // ∆t = 0.01s 418 419 /*--- Apply action to the simulated cart-pole ---*/ 420 // Runge-Kutta 4th order integration method 421 var dydx[6]float64 422 for i := 0; i < 2; i++ { 423 dydx[0] = cp.state[1] 424 dydx[2] = cp.state[3] 425 dydx[4] = cp.state[5] 426 cp.step(action, cp.state, &dydx) 427 cp.rk4(action, cp.state, dydx, &cp.state, TAU) 428 } 429 430 // Record this state 431 cp.cartpos_sum += math.Abs(cp.state[0]) 432 cp.cartv_sum += math.Abs(cp.state[1]); 433 cp.polepos_sum += math.Abs(cp.state[2]); 434 cp.polev_sum += math.Abs(cp.state[3]); 435 436 if step_num < 1000 { 437 cp.jiggleStep[int(step_num)] = math.Abs(cp.state[0]) + math.Abs(cp.state[1]) + math.Abs(cp.state[2]) + math.Abs(cp.state[3]) 438 } 439 if !cp.outsideBounds() { 440 cp.balanced_time_steps++ 441 } 442 } 443 444 func (cp *CartPole) step(action float64, st[6]float64, derivs *[6]float64) { 445 const MUP = 0.000002 446 const GRAVITY = -9.8 447 const FORCE_MAG = 10.0 // [N] 448 const MASS_CART = 1.0 // [kg] 449 450 const MASS_POLE_1 = 1.0 // [kg] 451 const LENGTH_1 = 0.5 // [m] - actually half the first pole's length 452 453 const LENGTH_2 = 0.05 // [m] - actually half the second pole's length 454 const MASS_POLE_2 = 0.1 // [kg] 455 456 force := (action - 0.5) * FORCE_MAG * 2.0 457 cos_theta_1 := math.Cos(st[2]) 458 sin_theta_1 := math.Sin(st[2]) 459 g_sin_theta_1 := GRAVITY * sin_theta_1 460 cos_theta_2 := math.Cos(st[4]) 461 sin_theta_2 := math.Sin(st[4]) 462 g_sin_theta_2 := GRAVITY * sin_theta_2 463 464 ml_1 := LENGTH_1 * MASS_POLE_1 465 ml_2 := LENGTH_2 * MASS_POLE_2 466 temp_1 := MUP * st[3] / ml_1 467 temp_2 := MUP * st[5] / ml_2 468 fi_1 := (ml_1 * st[3] * st[3] * sin_theta_1) + (0.75 * MASS_POLE_1 * cos_theta_1 * (temp_1 + g_sin_theta_1)) 469 fi_2 := (ml_2 * st[5] * st[5] * sin_theta_2) + (0.75 * MASS_POLE_2 * cos_theta_2 * (temp_2 + g_sin_theta_2)) 470 mi_1 := MASS_POLE_1 * (1 - (0.75 * cos_theta_1 * cos_theta_1)) 471 mi_2 := MASS_POLE_2 * (1 - (0.75 * cos_theta_2 * cos_theta_2)) 472 473 //fmt.Printf("%f -> %f\n", action, force) 474 475 derivs[1] = (force + fi_1 + fi_2) / (mi_1 + mi_2 + MASS_CART) 476 derivs[3] = -0.75 * (derivs[1] * cos_theta_1 + g_sin_theta_1 + temp_1) / LENGTH_1 477 derivs[5] = -0.75 * (derivs[1] * cos_theta_2 + g_sin_theta_2 + temp_2) / LENGTH_2 478 } 479 480 func (cp *CartPole) rk4(f float64, y, dydx [6]float64, yout *[6]float64, tau float64) { 481 var yt, dym, dyt [6]float64 482 hh := tau * 0.5 483 h6 := tau / 6.0 484 for i := 0; i <= 5; i++ { 485 yt[i] = y[i] + hh * dydx[i] 486 } 487 cp.step(f, yt, &dyt) 488 489 dyt[0] = yt[1] 490 dyt[2] = yt[3] 491 dyt[4] = yt[5] 492 for i := 0; i <= 5; i++ { 493 yt[i] = y[i] + hh * dyt[i] 494 } 495 cp.step(f, yt, &dym) 496 497 dym[0] = yt[1] 498 dym[2] = yt[3] 499 dym[4] = yt[5] 500 for i := 0; i <= 5; i++ { 501 yt[i] = y[i] + tau * dym[i] 502 dym[i] += dyt[i] 503 } 504 cp.step(f, yt, &dyt) 505 506 dyt[0] = yt[1] 507 dyt[2] = yt[3] 508 dyt[4] = yt[5] 509 for i := 0; i <= 5; i++ { 510 yout[i] = y[i] + h6 * (dydx[i] + dyt[i] + 2.0 * dym[i]) 511 } 512 } 513 514 // Check if simulation goes outside of bounds 515 func (cp *CartPole) outsideBounds() bool { 516 const failureAngle = thirty_six_degrees 517 518 return cp.state[0] < -2.4 || 519 cp.state[0] > 2.4 || 520 cp.state[2] < -failureAngle || 521 cp.state[2] > failureAngle || 522 cp.state[4] < -failureAngle || 523 cp.state[4] > failureAngle 524 } 525 526 func (cp *CartPole)resetState() { 527 if cp.isMarkov { 528 // Clear all fitness records 529 cp.cartpos_sum = 0.0 530 cp.cartv_sum = 0.0 531 cp.polepos_sum = 0.0 532 cp.polev_sum = 0.0 533 534 cp.state[0], cp.state[1], cp.state[2], cp.state[3], cp.state[4], cp.state[5] = 0, 0, 0, 0, 0, 0 535 } else if !cp.generalizationTest { 536 // The long run non-markov test 537 cp.state[0], cp.state[1], cp.state[3], cp.state[4], cp.state[5] = 0, 0, 0, 0, 0 538 cp.state[2] = math.Pi / 180.0 // one_degree 539 } 540 cp.balanced_time_steps = 0 // Always count # of balanced time steps 541 } 542 543