For past few months I was trying to understand genetic algorithms (GA) and most of the materials availble in the web was not always easy for me. Then I came across this article written by Ahmed Gad Genetic Algorithm Implementation in Python which implemented GA with numpy. Using this as a guiding tool I wrote my first GA in python with numpy. All of the codes were written by me except cal_pop_fitness.
The problem GA need to solve was to find parameters (a,b) in an equation of the format y = a*x1+b*x2 where x1,x2 and y are give as a numpy array. The equation I chose to solve is y = 2*x1+3*x2. Because we have two parameters to solve I chose two genes per chromosome. In all GA's we have to choose a fitness function and I chose mean squared error (MSE) as the fitness function for selecting best parents. MSE was chosen because we already have the real output y. The lesser the MSE better the parents we selected. This also the main difference between mine and Ahmed's GA where he used a maximisation fitness function I used a minimisation function. From the selected parents we generate offsprings by crossover and mutations. All offsprings go through crossovers but only a few offsprings have mutations.
import numpy as np
np.set_printoptions(formatter={'float': '{: 0.3f}'.format})
np.random.seed(1)
def generate_data(x_range):
# Formula='2*x1+3*x2'
x_range = range(-x_range,x_range)
x = np.vstack((np.array(x_range),np.array(x_range)*2)).T
y = [2*i[0]+3*i[1] for i in x]
return x,np.array(y)
def cal_pop_fitness(equation_inputs, pop):
fitness = np.sum(pop*equation_inputs, axis=1)
return fitness
def select_best_parents(total_population,top_parents):
arr = []
for i in total_population:
pred_y = cal_pop_fitness(i,X)
mse = (np.square(y-pred_y)).mean(axis=None) # Mean squared error
# Append the mse with chromose values
row = np.append(i,mse).tolist()
arr.append(row)
arr = np.array(arr)
# Sorting the chromosomes respect to mse
# Lower the mse better the individuals
arr = arr[arr[:,2].argsort()]
error = arr[:,2]
arr = arr[:,0:2] # removing mse column
return arr[0:top_parents,:],error[0:top_parents]
def crossover(sub_population):
children = []
for i in range(population_size):
# Selecting random two parents
parent1 = np.random.randint(0,sub_population.shape[0])
parent2 = np.random.randint(0,sub_population.shape[0])
# A child is created from parent1's first gene and parent2's second gene
child = [sub_population[parent1][0],sub_population[parent2][1]]
children.append(child)
return np.array(children)
def mutation(population):
for i,row in enumerate(population):
if np.random.rand() < mutation_rate:
# Random mutations
population[i][np.random.randint(genes)] = np.random.uniform(low = -4.0, high = 4.0)
return population
if __name__ == "__main__":
# Generate input ouptut data
X,y = generate_data(150)
genes = X.shape[1] # Total genes in a chromosome
population_size = 50 # Number of populations
top_parents = int(0.25*population_size) # Select top parents from the total populations for mating
mutation_rate = 0.1 # Mutation rate
generations = 1000 # number of generations
# Step1 : Population
# Total population
total_population = np.random.uniform(low = -4.0, high = 4.0, size = (population_size,genes))
for i in range(generations):
# Step2 : Fitness calculation
# Step3 : Mating pool
# Step4 : Parents selection
# Choosing best parents with their corresponding mse
sub_population,mse = select_best_parents(total_population,top_parents)
# Step5 : Mating
# Crossover
new_population = crossover(sub_population)
# Mutation
new_population = mutation(new_population)
print("Best Parameters: ",np.round(sub_population[0],3),"Best MSE: ", round(mse[0],3))
# Next population
total_population = new_population
# Real
x_range=range(-10,10)
x1 = np.array(x_range)
x2 = np.array(x_range)*2
formula='2*x1+3*x2'
y1=eval(formula)
# Predicted by Genetic algorithm
a = 1.943
b = 3.029
formula = 'a*x1+b*x2'
y2=eval(formula)
print("\nMSE between Real and predicted Forumulas : ",(np.square(y1-y2)).mean(axis=None))
cal_pop_fitness. \$\endgroup\$