github.com/yaricom/goNEAT@v0.0.0-20210507221059-e2110b885482/experiments/pole/cart2pole.go

github.com/yaricom/goNEAT@v0.0.0-20210507221059-e2110b885482/experiments/pole/cart2pole.go (about)

     1  package pole
     2  
     3  import (
     4  	"github.com/yaricom/goNEAT/experiments"
     5  	"github.com/yaricom/goNEAT/neat/network"
     6  	"fmt"
     7  	"github.com/yaricom/goNEAT/neat"
     8  	"math"
     9  	"github.com/yaricom/goNEAT/neat/genetics"
    10  	"os"
    11  	"sort"
    12  )
    13  
    14  const thirty_six_degrees = 36 * math.Pi / 180.0
    15  
    16  // The maximal number of time steps for Markov experiment
    17  const markov_max_steps = 100000
    18  // The maximal number of time steps for Non-Markov long run
    19  const non_markov_long_max_steps = 100000
    20  // The maximal number of time steps for Non-Markov generalization run
    21  const non_markov_generalization_max_steps = 1000
    22  
    23  
    24  // The double pole-balancing experiment both Markov and non-Markov versions
    25  type CartDoublePoleGenerationEvaluator struct {
    26  	// The output path to store execution results
    27  	OutputPath string
    28  	// The flag to indicate whether to apply Markov evaluation variant
    29  	Markov     bool
    30  
    31  	// The flag to indicate whether to use continuous activation or discrete
    32  	ActionType experiments.ActionType
    33  }
    34  
    35  // The structure to describe cart pole emulation
    36  type CartPole struct {
    37  	// The flag to indicate that we are executing Markov experiment setup (known velocities information)
    38  	isMarkov            bool
    39  	// Flag that we are looking at the champion in Non-Markov experiment
    40  	nonMarkovLong       bool
    41  	// Flag that we are testing champion's generalization
    42  	generalizationTest  bool
    43  
    44  	// The state of the system (x, ∆x/∆t, θ1, ∆θ1/∆t, θ2, ∆θ2/∆t)
    45  	state               [6]float64
    46  
    47  	// The number of balanced time steps passed for current organism evaluation
    48  	balanced_time_steps int
    49  
    50  	jiggleStep          [1000]float64
    51  
    52  	// Queues used for Gruau's fitness which damps oscillations
    53  
    54  	cartpos_sum         float64
    55  	cartv_sum           float64
    56  	polepos_sum         float64
    57  	polev_sum           float64
    58  }
    59  
    60  // Perform evaluation of one epoch on double pole balancing
    61  func (ex CartDoublePoleGenerationEvaluator) GenerationEvaluate(pop *genetics.Population, epoch *experiments.Generation, context *neat.NeatContext) (err error) {
    62  	cartPole := newCartPole(ex.Markov)
    63  
    64  	cartPole.nonMarkovLong = false
    65  	cartPole.generalizationTest = false
    66  
    67  	// Evaluate each organism on a test
    68  	for _, org := range pop.Organisms {
    69  		winner, err := ex.orgEvaluate(org, cartPole)
    70  		if err != nil {
    71  			return err
    72  		}
    73  
    74  		if winner && (epoch.Best == nil || org.Fitness > epoch.Best.Fitness){
    75  			// This will be winner in Markov case
    76  			epoch.Solved = true
    77  			epoch.WinnerNodes = len(org.Genotype.Nodes)
    78  			epoch.WinnerGenes = org.Genotype.Extrons()
    79  			epoch.WinnerEvals = context.PopSize * epoch.Id + org.Genotype.Id
    80  			epoch.Best = org
    81  			org.IsWinner = true
    82  		}
    83  	}
    84  
    85  	// Check for winner in Non-Markov case
    86  	if !ex.Markov {
    87  		// The best individual (i.e. the one with the highest fitness value) of every generation is tested for
    88  		// its ability to balance the system for a longer time period. If a potential solution passes this test
    89  		// by keeping the system balanced for 100’000 time steps, the so called generalization score(GS) of this
    90  		// particular individual is calculated. This score measures the potential of a controller to balance the
    91  		// system starting from different initial conditions. It's calculated with a series of experiments, running
    92  		// over 1000 time steps, starting from 625 different initial conditions.
    93  		// The initial conditions are chosen by assigning each value of the set Ω = [0.05 0.25 0.5 0.75 0.95] to
    94  		// each of the states x, ∆x/∆t, θ1 and ∆θ1/∆t, scaled to the range of the variables.The short pole angle θ2 
    95  		// and its angular velocity ∆θ2/∆t are set to zero. The GS is then defined as the number of successful runs 
    96  		// from the 625 initial conditions and an individual is defined as a solution if it reaches a generalization 
    97  		// score of 200 or more.
    98  
    99  		// Sort the species by max organism fitness in descending order - the highest fitness first
   100  		sorted_species := make([]*genetics.Species, len(pop.Species))
   101  		copy(sorted_species, pop.Species)
   102  		sort.Sort(sort.Reverse(genetics.ByOrganismFitness(sorted_species)))
   103  
   104  		// First update what is checked and unchecked
   105  		var curr_species *genetics.Species
   106  		for _, curr_species = range sorted_species {
   107  			max, _ := curr_species.ComputeMaxAndAvgFitness()
   108  			if max > curr_species.MaxFitnessEver {
   109  				curr_species.IsChecked = false
   110  			}
   111  		}
   112  
   113  		// Now find first (most fit) species that is unchecked
   114  		curr_species = nil
   115  		for _, curr_species = range sorted_species {
   116  			if !curr_species.IsChecked {
   117  				break
   118  			}
   119  		}
   120  		if curr_species == nil {
   121  			curr_species = sorted_species[0]
   122  		}
   123  
   124  		// Remember it was checked
   125  		curr_species.IsChecked = true
   126  
   127  		// the organism champion
   128  		champion := curr_species.FindChampion()
   129  		champion_fitness := champion.Fitness
   130  
   131  		// Now check to make sure the champion can do 100'000 evaluations
   132  		cartPole.nonMarkovLong = true
   133  		cartPole.generalizationTest = false
   134  
   135  		longRunPassed, err := ex.orgEvaluate(champion, cartPole)
   136  		if err != nil {
   137  			return err
   138  		}
   139  		if longRunPassed {
   140  
   141  			// the champion passed non-Markov long test, start generalization
   142  			cartPole.nonMarkovLong = false
   143  			cartPole.generalizationTest = true
   144  
   145  			// Given that the champion passed long run test, now run it on generalization tests running
   146  			// over 1'000 time steps, starting from 625 different initial conditions
   147  			state_vals := [5]float64{0.05, 0.25, 0.5, 0.75, 0.95}
   148  			generalization_score := 0
   149  			for s0c := 0; s0c < 5; s0c++ {
   150  				for s1c := 0; s1c < 5; s1c++ {
   151  					for s2c := 0; s2c < 5; s2c++ {
   152  						for s3c := 0; s3c < 5; s3c++ {
   153  							cartPole.state[0] = state_vals[s0c] * 4.32 - 2.16
   154  							cartPole.state[1] = state_vals[s1c] * 2.70 - 1.35
   155  							cartPole.state[2] = state_vals[s2c] * 0.12566304 - 0.06283152 // 0.06283152 = 3.6 degrees
   156  							cartPole.state[3] = state_vals[s3c] * 0.30019504 - 0.15009752 // 0.15009752 = 8.6 degrees
   157  							// The short pole angle and its angular velocity are set to zero.
   158  							cartPole.state[4] = 0.0
   159  							cartPole.state[5] = 0.0
   160  
   161  							// The champion needs to be flushed here because it may have
   162  							// leftover activation from its last test run that could affect
   163  							// its recurrent memory
   164  							champion.Phenotype.Flush()
   165  
   166  							if generalized, err := ex.orgEvaluate(champion, cartPole); generalized {
   167  								generalization_score++
   168  
   169  								if neat.LogLevel == neat.LogLevelDebug {
   170  									neat.DebugLog(
   171  										fmt.Sprintf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f\n",
   172  										cartPole.state[0], cartPole.state[1],
   173  										cartPole.state[2], cartPole.state[4], thirty_six_degrees))
   174  								}
   175  							} else if err != nil {
   176  								return err
   177  							}
   178  						}
   179  					}
   180  				}
   181  			}
   182  
   183  			if generalization_score >= 200 {
   184  				// The generalization test winner
   185  				neat.InfoLog(
   186  					fmt.Sprintf("The non-Markov champion found! (Generalization Score = %d)",
   187  						generalization_score))
   188  				champion.Fitness = float64(generalization_score)
   189  				champion.IsWinner = true
   190  				epoch.Solved = true
   191  				epoch.WinnerNodes = len(champion.Genotype.Nodes)
   192  				epoch.WinnerGenes = champion.Genotype.Extrons()
   193  				epoch.WinnerEvals = context.PopSize * epoch.Id + champion.Genotype.Id
   194  				epoch.Best = champion
   195  			} else {
   196  				neat.InfoLog("The non-Markov champion unable to generalize")
   197  				champion.Fitness = champion_fitness // Restore the champ's fitness
   198  				champion.IsWinner = false
   199  			}
   200  		} else {
   201  			neat.InfoLog("The non-Markov champion missed the 100'000 run test")
   202  			champion.Fitness = champion_fitness // Restore the champ's fitness
   203  			champion.IsWinner = false
   204  		}
   205  	}
   206  
   207  
   208  	// Fill statistics about current epoch
   209  	epoch.FillPopulationStatistics(pop)
   210  
   211  	// Only print to file every print_every generations
   212  	if epoch.Solved || epoch.Id % context.PrintEvery == 0 {
   213  		pop_path := fmt.Sprintf("%s/gen_%d", experiments.OutDirForTrial(ex.OutputPath, epoch.TrialId), epoch.Id)
   214  		file, err := os.Create(pop_path)
   215  		if err != nil {
   216  			neat.ErrorLog(fmt.Sprintf("Failed to dump population, reason: %s\n", err))
   217  		} else {
   218  			pop.WriteBySpecies(file)
   219  		}
   220  	}
   221  
   222  	if epoch.Solved {
   223  		// print winner organism
   224  		for _, org := range pop.Organisms {
   225  			if org.IsWinner {
   226  				// Prints the winner organism to file!
   227  				org_path := fmt.Sprintf("%s/%s_%.1f_%d-%d", experiments.OutDirForTrial(ex.OutputPath, epoch.TrialId),
   228  					"pole2_winner", org.Fitness, org.Phenotype.NodeCount(), org.Phenotype.LinkCount())
   229  				file, err := os.Create(org_path)
   230  				if err != nil {
   231  					neat.ErrorLog(fmt.Sprintf("Failed to dump winner organism genome, reason: %s\n", err))
   232  				} else {
   233  					org.Genotype.Write(file)
   234  					neat.InfoLog(fmt.Sprintf("Generation #%d winner %d dumped to: %s\n", epoch.Id, org.Genotype.Id, org_path))
   235  				}
   236  				break
   237  			}
   238  		}
   239  	}
   240  
   241  	return err
   242  }
   243  
   244  // This methods evaluates provided organism for cart double pole-balancing task
   245  func (ex *CartDoublePoleGenerationEvaluator) orgEvaluate(organism *genetics.Organism, cartPole *CartPole) (winner bool, err error) {
   246  	// Try to balance a pole now
   247  	organism.Fitness, err = cartPole.evalNet(organism.Phenotype, ex.ActionType)
   248  	if err != nil {
   249  		return false, err
   250  	}
   251  
   252  	if neat.LogLevel == neat.LogLevelDebug {
   253  		neat.DebugLog(fmt.Sprintf("Organism #%3d\tfitness: %f", organism.Genotype.Id, organism.Fitness))
   254  	}
   255  
   256  	// DEBUG CHECK if organism is damaged
   257  	if !(cartPole.nonMarkovLong && cartPole.generalizationTest) && organism.CheckChampionChildDamaged() {
   258  		neat.WarnLog(fmt.Sprintf("ORGANISM DAMAGED:\n%s", organism.Genotype))
   259  	}
   260  
   261  	// Decide if its a winner, in Markov Case
   262  	if cartPole.isMarkov {
   263  		if organism.Fitness >= markov_max_steps {
   264  			winner = true
   265  			organism.Fitness = 1.0
   266  			organism.Error = 0.0
   267  		} else {
   268  			// we use linear scale
   269  			organism.Error = (markov_max_steps - organism.Fitness) / markov_max_steps
   270  			organism.Fitness = 1.0 - organism.Error
   271  		}
   272  	} else if cartPole.nonMarkovLong {
   273  		// if doing the long test non-markov
   274  		if organism.Fitness >= non_markov_long_max_steps {
   275  			winner = true
   276  		}
   277  	} else if cartPole.generalizationTest {
   278  		if organism.Fitness >= non_markov_generalization_max_steps {
   279  			winner = true
   280  		}
   281  	} else {
   282  		winner = false
   283  	}
   284  	return winner, err
   285  }
   286  
   287  
   288  // If markov is false, then velocity information will be withheld from the network population (non-Markov)
   289  func newCartPole(markov bool) *CartPole {
   290  	return &CartPole {
   291  		isMarkov: markov,
   292  	}
   293  }
   294  
   295  func (cp *CartPole)evalNet(net *network.Network, actionType experiments.ActionType) (steps float64, err error) {
   296  	non_markov_max := non_markov_generalization_max_steps
   297  	if cp.nonMarkovLong {
   298  		non_markov_max = non_markov_long_max_steps
   299  	}
   300  
   301  
   302  
   303  	cp.resetState()
   304  
   305  	if cp.isMarkov {
   306  		input := make([]float64, 7)
   307  		for steps = 0; steps < markov_max_steps; steps++ {
   308  			input[0] = (cp.state[0] + 2.4) / 4.8
   309  			input[1] = (cp.state[1] + 1.0) / 2.0
   310  			input[2] = (cp.state[2] + thirty_six_degrees) / (thirty_six_degrees * 2.0)//0.52
   311  			input[3] = (cp.state[3] + 1.0) / 2.0
   312  			input[4] = (cp.state[4] + thirty_six_degrees) / (thirty_six_degrees * 2.0)//0.52
   313  			input[5] = (cp.state[5] + 1.0) / 2.0
   314  			input[6] = 0.5
   315  
   316  			net.LoadSensors(input)
   317  
   318  			/*-- activate the network based on the input --*/
   319  			if res, err := net.Activate(); !res {
   320  				//If it loops, exit returning only fitness of 1 step
   321  				neat.DebugLog(fmt.Sprintf("Failed to activate Network, reason: %s", err))
   322  				return 1.0, nil
   323  			}
   324  			action := net.Outputs[0].Activation
   325  			if actionType == experiments.DiscreteAction {
   326  				// make action values discrete
   327  				if action < 0.5 {
   328  					action = 0
   329  				} else {
   330  					action = 1
   331  				}
   332  			}
   333  			cp.performAction(action, steps)
   334  
   335  			if cp.outsideBounds() {
   336  				// if failure stop it now
   337  				break;
   338  			}
   339  
   340  			//fmt.Printf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f\n", cp.state[0], cp.state[1], cp.state[2], cp.state[4], thirty_six_degrees)
   341  		}
   342  		return steps, nil
   343  	} else {
   344  		input := make([]float64, 4)
   345  		// The non Markov case
   346  		for steps = 0; steps < float64(non_markov_max); steps++ {
   347  			input[0] = cp.state[0] / 4.8
   348  			input[1] = cp.state[2] / 0.52
   349  			input[2] = cp.state[4] / 0.52
   350  			input[3] = 1.0
   351  
   352  			err = net.LoadSensors(input)
   353  			if err != nil {
   354  				return 0, err
   355  			}
   356  
   357  			/*-- activate the network based on the input --*/
   358  			if res, err := net.Activate(); !res {
   359  				// If it loops, exit returning only fitness of 1 step
   360  				neat.WarnLog(fmt.Sprintf("Failed to activate Network, reason: %s", err))
   361  				return 0.0001, err
   362  			}
   363  
   364  			action := net.Outputs[0].Activation
   365  			if actionType == experiments.DiscreteAction {
   366  				// make action values discrete
   367  				if action < 0.5 {
   368  					action = 0
   369  				} else {
   370  					action = 1
   371  				}
   372  			}
   373  			cp.performAction(action, steps)
   374  			if cp.outsideBounds() {
   375  				//fmt.Printf("x: % f, xv: % f, t1: % f, t2: % f, angle: % f, steps: %f\n",
   376  				//	cp.state[0], cp.state[1], cp.state[2], cp.state[4], thirty_six_degrees, steps)
   377  				// if failure stop it now
   378  				break;
   379  			}
   380  
   381  
   382  		}
   383  		/*-- If we are generalizing we just need to balance it for a while --*/
   384  		if cp.generalizationTest {
   385  			return float64(cp.balanced_time_steps), nil
   386  		}
   387  
   388  		// Sum last 100
   389  		jiggle_total := 0.0
   390  		if steps >= 100.0 && !cp.nonMarkovLong {
   391  			// Adjust for array bounds and count
   392  			for count := int(steps - 100.0); count < int(steps); count++ {
   393  				jiggle_total += cp.jiggleStep[count]
   394  			}
   395  		}
   396  		if !cp.nonMarkovLong {
   397  			var non_markov_fitness float64
   398  			if cp.balanced_time_steps >= 100 {
   399  				// F = 0.1f1 + 0.9f2
   400  				non_markov_fitness = 0.1 * float64(cp.balanced_time_steps) / 1000.0 + 0.9 * 0.75 / float64(jiggle_total)
   401  			} else {
   402  				// F = t / 1000
   403  				non_markov_fitness = 0.1 * float64(cp.balanced_time_steps) / 1000.0
   404  			}
   405  			if neat.LogLevel == neat.LogLevelDebug {
   406  				neat.DebugLog(fmt.Sprintf("Balanced time steps: %d, jiggle: %f ***\n",
   407  					cp.balanced_time_steps, jiggle_total))
   408  			}
   409  			return non_markov_fitness, nil
   410  		} else {
   411  			return steps, nil
   412  		}
   413  	}
   414  }
   415  
   416  func (cp *CartPole) performAction(action, step_num float64) {
   417  	const TAU = 0.01 // ∆t = 0.01s
   418  
   419  	/*--- Apply action to the simulated cart-pole ---*/
   420  	// Runge-Kutta 4th order integration method
   421  	var dydx[6]float64
   422  	for i := 0; i < 2; i++ {
   423  		dydx[0] = cp.state[1]
   424  		dydx[2] = cp.state[3]
   425  		dydx[4] = cp.state[5]
   426  		cp.step(action, cp.state, &dydx)
   427  		cp.rk4(action, cp.state, dydx, &cp.state, TAU)
   428  	}
   429  
   430  	// Record this state
   431  	cp.cartpos_sum += math.Abs(cp.state[0])
   432  	cp.cartv_sum += math.Abs(cp.state[1]);
   433  	cp.polepos_sum += math.Abs(cp.state[2]);
   434  	cp.polev_sum += math.Abs(cp.state[3]);
   435  
   436  	if step_num < 1000 {
   437  		cp.jiggleStep[int(step_num)] = math.Abs(cp.state[0]) + math.Abs(cp.state[1]) + math.Abs(cp.state[2]) + math.Abs(cp.state[3])
   438  	}
   439  	if !cp.outsideBounds() {
   440  		cp.balanced_time_steps++
   441  	}
   442  }
   443  
   444  func (cp *CartPole) step(action float64, st[6]float64, derivs *[6]float64) {
   445  	const MUP = 0.000002
   446  	const GRAVITY = -9.8
   447  	const FORCE_MAG = 10.0       // [N]
   448  	const MASS_CART = 1.0        // [kg]
   449  
   450  	const MASS_POLE_1 = 1.0      // [kg]
   451  	const LENGTH_1 = 0.5         // [m] - actually half the first pole's length
   452  
   453  	const LENGTH_2 = 0.05        // [m] - actually half the second pole's length
   454  	const MASS_POLE_2 = 0.1      // [kg]
   455  
   456  	force := (action - 0.5) * FORCE_MAG * 2.0
   457  	cos_theta_1 := math.Cos(st[2])
   458  	sin_theta_1 := math.Sin(st[2])
   459  	g_sin_theta_1 := GRAVITY * sin_theta_1
   460  	cos_theta_2 := math.Cos(st[4])
   461  	sin_theta_2 := math.Sin(st[4])
   462  	g_sin_theta_2 := GRAVITY * sin_theta_2
   463  
   464  	ml_1 := LENGTH_1 * MASS_POLE_1
   465  	ml_2 := LENGTH_2 * MASS_POLE_2
   466  	temp_1 := MUP * st[3] / ml_1
   467  	temp_2 := MUP * st[5] / ml_2
   468  	fi_1 := (ml_1 * st[3] * st[3] * sin_theta_1) + (0.75 * MASS_POLE_1 * cos_theta_1 * (temp_1 + g_sin_theta_1))
   469  	fi_2 := (ml_2 * st[5] * st[5] * sin_theta_2) + (0.75 * MASS_POLE_2 * cos_theta_2 * (temp_2 + g_sin_theta_2))
   470  	mi_1 := MASS_POLE_1 * (1 - (0.75 * cos_theta_1 * cos_theta_1))
   471  	mi_2 := MASS_POLE_2 * (1 - (0.75 * cos_theta_2 * cos_theta_2))
   472  
   473  	//fmt.Printf("%f -> %f\n", action, force)
   474  
   475  	derivs[1] = (force + fi_1 + fi_2) / (mi_1 + mi_2 + MASS_CART)
   476  	derivs[3] = -0.75 * (derivs[1] * cos_theta_1 + g_sin_theta_1 + temp_1) / LENGTH_1
   477  	derivs[5] = -0.75 * (derivs[1] * cos_theta_2 + g_sin_theta_2 + temp_2) / LENGTH_2
   478  }
   479  
   480  func (cp *CartPole) rk4(f float64, y, dydx [6]float64, yout *[6]float64, tau float64) {
   481  	var yt, dym, dyt [6]float64
   482  	hh := tau * 0.5
   483  	h6 := tau / 6.0
   484  	for i := 0; i <= 5; i++ {
   485  		yt[i] = y[i] + hh * dydx[i]
   486  	}
   487  	cp.step(f, yt, &dyt)
   488  
   489  	dyt[0] = yt[1]
   490  	dyt[2] = yt[3]
   491  	dyt[4] = yt[5]
   492  	for i := 0; i <= 5; i++ {
   493  		yt[i] = y[i] + hh * dyt[i]
   494  	}
   495  	cp.step(f, yt, &dym)
   496  
   497  	dym[0] = yt[1]
   498  	dym[2] = yt[3]
   499  	dym[4] = yt[5]
   500  	for i := 0; i <= 5; i++ {
   501  		yt[i] = y[i] + tau * dym[i]
   502  		dym[i] += dyt[i]
   503  	}
   504  	cp.step(f, yt, &dyt)
   505  
   506  	dyt[0] = yt[1]
   507  	dyt[2] = yt[3]
   508  	dyt[4] = yt[5]
   509  	for i := 0; i <= 5; i++ {
   510  		yout[i] = y[i] + h6 * (dydx[i] + dyt[i] + 2.0 * dym[i])
   511  	}
   512  }
   513  
   514  // Check if simulation goes outside of bounds
   515  func (cp *CartPole) outsideBounds() bool {
   516  	const failureAngle = thirty_six_degrees
   517  
   518  	return cp.state[0] < -2.4 ||
   519  		cp.state[0] > 2.4 ||
   520  		cp.state[2] < -failureAngle ||
   521  		cp.state[2] > failureAngle ||
   522  		cp.state[4] < -failureAngle ||
   523  		cp.state[4] > failureAngle
   524  }
   525  
   526  func (cp *CartPole)resetState() {
   527  	if cp.isMarkov {
   528  		// Clear all fitness records
   529  		cp.cartpos_sum = 0.0
   530  		cp.cartv_sum = 0.0
   531  		cp.polepos_sum = 0.0
   532  		cp.polev_sum = 0.0
   533  
   534  		cp.state[0], cp.state[1], cp.state[2], cp.state[3], cp.state[4], cp.state[5] = 0, 0, 0, 0, 0, 0
   535  	} else if !cp.generalizationTest {
   536  		// The long run non-markov test
   537  		cp.state[0], cp.state[1], cp.state[3], cp.state[4], cp.state[5] = 0, 0, 0, 0, 0
   538  		cp.state[2] = math.Pi / 180.0 // one_degree
   539  	}
   540  	cp.balanced_time_steps = 0 // Always count # of balanced time steps
   541  }
   542  
   543