Abstract
由于鄙人一直跟导师一直做演化算法的研究,我主要是做遗传算法,但学艺未精,现在还没有任何学术成就,实在丢人。很多人都认为演化算法没有任何工程上面的作用,这岂不是羞辱我做遗传算法的人吗?嗯,其实我也认为遗传算法一点作用的没有,哎,还是算了趁这个春节假期期间做点小玩意,让遗传算法有点意思。本文利用遗传算法对神经网络做优化,而神经网络根据当前的位置判断飞机的移动方向(现在只可以左右移动)。
Relate work
首先,我们介绍一下遗传算法。其实遗传算法是模仿大自然生物繁衍的现象、规律进行迭代,计算出对应的适应度函数的最优解。遗传算法主要有以下几个算子组成:
- 选择算子:它选择一些相对比较比较好的个体(适应度比较高)作为候选个体来进行后续的杂交和变异操作。
-
杂交算子:模拟染色体的交叉现象,将两个个体的基因在某一点出进行交换。
交叉操作 - 变异操作:将某一个变量值随机变成在该变量约束范围的其他值。
听起来好像挺悬的,因为遗传算法其实没有什么理论上面的支撑。它仅仅是一个仿生算法,当然其他算法也缺乏理论上面的支持如:粒子群算法(PSO),蚁群算法。实际上遗传算法相对于其他优化算法而言,很多时候都可以逃离局部最优点并到达全局最优点,但它却不像穷举法一样暴力解决。
Coding
Neuro Network
# -*- coding: utf-8 -*-
import random
import math
def sigmoid(z):
return 1 / (1 + math.exp(-z))
def random_clamped():
return random.random() * 2 - 1
class Neuron(object):
def __init__(self):
self.bias = 0
self.weights = []
def init_weight(self, n):
self.weights = []
for i in range(n):
self.weights.append(random_clamped())
# def __repr__(self):
# return 'Neuron weight size: {}, biase value: {}'.format(len(self.weights), self.bias)
class Layer(object):
def __init__(self, index):
self.index = index
self.neurons = []
def init_neurons(self, n_output, n_input):
self.neurons = []
for i in range(n_output):
neuron = Neuron()
neuron.init_weight(n_input)
self.neurons.append(neuron)
# def __repr__(self):
# return 'Layer ID: {}, Layer neuron size: {}'.format(self.index, len(self.neurons))
class NeuroNetwork(object):
def __init__(self):
self._layers = []
def init_neuro_network(self, input, hiddens, output):
index = 0
previous_neurons = 0
layer = Layer(index)
layer.init_neurons(input, previous_neurons)
previous_neurons = input
self._layers.append(layer)
index += 1
for i in range(len(hiddens)):
layer = Layer(index)
layer.init_neurons(hiddens[i], previous_neurons)
previous_neurons = hiddens[i]
self._layers.append(layer)
index += 1
layer = Layer(index)
layer.init_neurons(output, previous_neurons)
self._layers.append(layer)
def get_weight(self):
data = {'network': [], 'weights': []}
for layer in self._layers:
data['network'].append(len(layer.neurons))
for neuron in layer.neurons:
for weight in neuron.weights:
data['weights'].append(weight)
return data
def set_weight(self, data):
previous_neurous = 0
index = 0
index_weights = 0
self._layers = []
for num_output in data['network']:
layer = Layer(index)
layer.init_neurons(num_output, previous_neurous)
for j in range(num_output):
for k in range(len(layer.neurons[j].weights)):
layer.neurons[j].weights[k] = data['weights'][index_weights]
index_weights += 1
previous_neurous = num_output
index += 1
self._layers.append(layer)
def feed_forward(self, inputs):
"""
input the status
:param inputs:
:return:
"""
# input the status for input neurons
for i in range(len(inputs)):
self._layers[0].neurons[i].biase = inputs[i]
prev_layer = self._layers[0]
for i in range(len(self._layers)):
if i == 0:
continue
for j in range(len(self._layers[i].neurons)):
# this loop get each neuron of current layer
sum = 0
for k in range(len(prev_layer.neurons)):
# loop previous of output to get calculate the linear result in j-th neuron
# calculate the product between weights and previous output
sum += prev_layer.neurons[k].biase * self._layers[i].neurons[j].weights[k]
# calculate sigmoid with linear result
self._layers[i].neurons[j].biase = sigmoid(sum)
prev_layer = self._layers[i]
out = []
last_layer = self._layers[-1]
for i in range(len(last_layer.neurons)):
out.append(last_layer.neurons[i].biase)
return out
def print_info(self):
for layer in self._layers:
print(layer)
首先我们需要定义一下神经网络的结构,该神经网络不是用tensorflow搭建的,因为在tensorflow中无法用到遗传算法对其进行优化。当然代码也是比较简陋的。
Genetic Algorithm
# -*- coding: utf-8 -*-
import random
from lesson9.neuro_network import random_clamped
class Genome(object):
def __init__(self, score, network_weights):
self.score = score
self.network_weights = network_weights
class Generation(object):
def __init__(self, score_sort=-1, mutation_rate=0.05, mutation_range=2, elitism=0.2, population=50,
random_behaviour=0.1, n_child=1):
self.genomes = []
self._score_sort = score_sort
self._mutation_rate = mutation_rate
self._mutation_range = mutation_range
self._elitism = elitism
self._population = population
self._random_behaviour = random_behaviour
self._n_child = n_child
def add_genome(self, genome):
i = 0
for i in range(len(self.genomes)):
if self._score_sort < 0:
if genome.score > self.genomes[i].score:
break
else:
if genome.score < self.genomes[i].score:
break
self.genomes.insert(i, genome)
def breed(self, genome1, genome2, n_child):
"""
breed children between genome1 and genome2
:param genome1:
:param genome2:
:param n_child: generate the number of children
:return:
"""
datas = []
for n in range(n_child):
# data = genome1
data = Genome(0, {'network': genome1.network_weights['network'][:],
'weights': genome1.network_weights['weights'][:]})
for i in range(len(genome2.network_weights['weights'])):
# crossover values
if random.random() <= 0.7:
data.network_weights['weights'][i] = genome2.network_weights['weights'][i]
for i in range(len(data.network_weights['weights'])):
# mutate values
if random.random() <= self._mutation_rate:
data.network_weights['weights'][i] += random.random() * self._mutation_rate * 2 - self._mutation_range
datas.append(data)
return datas
def generate_next_generation(self):
nexts = [] # the weights of genes
for i in range(round(self._elitism * self._population)):
if len(nexts) < self._population:
nexts.append(self.genomes[i].network_weights)
for i in range(round(self._random_behaviour * self._population)):
n = self.genomes[0].network_weights
for k in range(len(n['weights'])):
# generate all values of weights
n['weights'][k] = random_clamped()
if len(nexts) < self._population:
nexts.append(n)
max_n = 0
while True:
for i in range(max_n):
childs = self.breed(self.genomes[i], self.genomes[max_n], self._n_child if self._n_child > 0 else 1)
for c in range(len(childs)):
nexts.append(childs[c].network_weights)
if len(nexts) >= self._population:
return nexts
max_n += 1
if max_n >= len(self.genomes) - 1:
max_n = 0
- add_genome:添加个体到种群当中,用它作为下一轮迭代的选择使用的候选个体
- breed函数:中包含了杂交,变异两个主要的算子操作,n_child用于确定在个体繁衍当中可以产生多个后代。
- generate_next_generation:在本轮迭代结束时,我们需要重新生成下轮的个体,而这些个体是根据上一轮迭代产生的个体而生成的。其实该函数也是整个遗传算法的大框架。值得主义的是里面有“精英主义”,“精英主义”就是选择最优的几个解直接作为下次迭代的个体,也就是保留它。这样可以加快遗传算法的收敛速度,但也容易让它陷入局部最优当中。这里有个random_behaviour用于重新置换一些新的变量到种群中,增加种群多样性,可以看作是一种灾变。接下来就循环进行交叉和变异,其实这里隐藏了选择操作。大家可以留一下
max_n
这个变量是取前几个个体进行交叉和变异,而这前几个元素是种群当中个体适应度较高的前几个。(个人认为这里的选择操作还是缺乏一定的随机性)
Neuro Evolution
# -*- coding: utf-8 -*-
from lesson9.neuro_network import NeuroNetwork
from lesson9.GA import Generation, Genome
class Generations(object):
def __init__(self, population=50, network_size=None):
self.generations = []
self._population = population
if network_size is None:
self._network_size = [4, [16], 1]
else:
self._network_size = network_size
def first_generation(self):
out = []
# init the population with neuro-network
for i in range(self._population):
nn = NeuroNetwork()
nn.init_neuro_network(self._network_size[0], self._network_size[1], self._network_size[2])
out.append(nn.get_weight())
self.generations.append(Generation())
return out
def next_generation(self):
if len(self.generations) == 0:
return False
gen = self.generations[-1].generate_next_generation()
self.generations.append(Generation())
return gen
def add_genome(self, genome):
if len(self.generations) == 0:
return False
return self.generations[-1].add_genome(genome)
class NeuroEvolution(object):
def __init__(self):
self.generations = Generations()
def restart(self):
self.generations = Generations()
def next_generation(self, low_historic=False, historic=0):
networks = []
# get the weights of networks
if len(self.generations.generations) == 0:
networks = self.generations.first_generation()
else:
networks = self.generations.next_generation()
nn = []
for i in range(len(networks)):
n = NeuroNetwork()
n.set_weight(networks[i])
nn.append(n)
if low_historic:
if len(self.generations.generations) >= 2:
genomes = self.generations.generations[len(self.generations.generations) - 2].genomes
for i in range(genomes):
genomes[i].network = None
if historic != -1:
if len(self.generations.generations) > historic + 1:
del self.generations.generations[0:len(self.generations.generations) - (historic + 1)]
return nn
def network_score(self, score, network):
self.generations.add_genome(Genome(score, network.get_weight()))
这里是把神经网络和遗传算法合并在一起的代码。在神经网络当中,我们有很多权值组成(weights),而在不同的优化算法中我们都是控制这些权值来使得神经网络在某个损失函数下达到全局最优,从而获得全局最优解。同样的,在遗传算法当中,我们把这些权值作为我们的基因或者染色体。而每个个体都有着自己的整个神经网络里面的所有权值,我们根据这些权值控制我们的飞机的移动方向,避免与炸弹碰到,同时我们对每个个体(神经网络)进行计分。从中知道哪个个体的权值是比较好的,哪个排在种群前面。这样我们就可以进行选择,杂交和变异的操作。
game
# -*- coding: utf-8 -*-
import pygame
import sys
from pygame.locals import *
import random
import math
from lesson9 import neuro_evolution
BACKGROUND = (255, 255, 255)
SCREEN_SIZE = (320, 480)
class Plane(object):
def __init__(self, plane_image):
self.plane_image = plane_image
self.rect = plane_image.get_rect()
self.width = self.rect[2]
self.height = self.rect[3]
self.x = SCREEN_SIZE[0] / 2 - self.width / 2
self.y = SCREEN_SIZE[1] - self.height
self.move_x = 0
self.speed = 2
self.alive = True
def update(self):
self.x += self.move_x * self.speed
def draw(self, screen):
screen.blit(self.plane_image, (self.x, self.y, self.width, self.height))
def is_dead(self, enemes):
if self.x < -self.width or self.x + self.width > SCREEN_SIZE[0] + self.width:
return True
for eneme in enemes:
if self.collision(eneme):
return True
return False
def collision(self, eneme):
if not (self.x > eneme.x + eneme.width or
self.x + self.width < eneme.x or
self.y > eneme.y + eneme.height or
self.y + self.height < eneme.y):
return True
else:
return False
def get_inputs_values(self, enemes, input_size=4):
inputs = []
for i in range(input_size):
inputs.append(0.0)
inputs[0] = (self.x * 1.0 / SCREEN_SIZE[0])
index = 1
for eneme in enemes:
inputs[index] = eneme.x * 1.0 / SCREEN_SIZE[0]
index += 1
inputs[index] = eneme.y * 1.0 / SCREEN_SIZE[1]
index += 1
if len(enemes) > 0 and self.x < enemes[0].x:
inputs[index] = -1.0
index += 1
else:
inputs[index] = 1.0
return inputs
class Enemy(object):
def __init__(self, enemy_image):
self.enemy_image = enemy_image
self.rect = enemy_image.get_rect()
self.width = self.rect[2]
self.height = self.rect[3]
self.x = random.choice(range(0, int(SCREEN_SIZE[0] - self.width / 2), 71))
self.y = 0
def update(self):
self.y += 6
def draw(self, screen):
screen.blit(self.enemy_image, (self.x, self.y, self.width, self.height))
def is_out(self):
return True if self.y >= SCREEN_SIZE[1] else False
class Game(object):
def __init__(self):
pygame.init()
self.screen = pygame.display.set_mode(SCREEN_SIZE)
self.clock = pygame.time.Clock()
pygame.display.set_caption("Plane")
self.ai = neuro_evolution.NeuroEvolution()
self.generation = 0
self.max_enemes = 1
self.plane_image = pygame.image.load("./imgs/plane.jpg").convert_alpha()
self.eneme_image = pygame.image.load("./imgs/missile.jpg").convert_alpha()
def start(self):
self.score = 0
self.planes = []
self.enemes = []
self.gen = self.ai.next_generation()
for i in range(len(self.gen)):
plane = Plane(self.plane_image)
self.planes.append(plane)
self.generation += 1
self.alives = len(self.planes)
def update(self, screen):
for i in range(len(self.planes)):
if self.planes[i].alive:
inputs = self.planes[i].get_inputs_values(self.enemes)
res = self.gen[i].feed_forward(inputs)
if res[0] < 0.5:
self.planes[i].move_x = -1
elif res[0] > 0.55:
self.planes[i].move_x = 1
self.planes[i].update()
self.planes[i].draw(screen)
if self.planes[i].is_dead(self.enemes) == True:
self.planes[i].alive = False
self.alives -= 1
self.ai.network_score(self.score, self.gen[i])
if self.is_ai_all_dead():
self.start()
self.gen_enemes()
for i in range(len(self.enemes)):
self.enemes[i].update()
self.enemes[i].draw(screen)
if self.enemes[i].is_out():
del self.enemes[i]
break
self.score += 1
print("alive: {}, generation: {}, score {}".format(self.alives, self.generation, self.score))
def run(self, FPS=100):
while True:
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
self.screen.fill(BACKGROUND)
self.update(self.screen)
pygame.display.update()
self.clock.tick(FPS)
def gen_enemes(self):
if len(self.enemes) < self.max_enemes:
enemy = Enemy(self.eneme_image)
self.enemes.append(enemy)
def is_ai_all_dead(self):
for plane in self.planes:
if plane.alive:
return False
return True
game = Game()
game.start()
game.run()
这就是游戏的开始。我们在开始游戏时,随机生成一些个体作为起始变量值。在游戏进行时,很多飞机(个体)都会给炸弹炸掉,那么就会有的飞机早点炸掉,有些飞机晚点炸掉,自然而然就会有不同高低的分值。而这些分值正好就是各个个体(神经网络)适应度函数值。这其实也符合了遗传算法里面的选择特点“适者生存,物竞天择”。当所有的飞机都死光光的时候,我们就可以根据这些个体得到的分值产生下一代群体(也就是update函数中判断is_ai_all_dead,然后再重新产生个体)。
Result Sample
在一开始的时候:
init
几乎是自杀的人肉炸弹,然后慢慢它学乖了....
ok
Conclusion
但其实由于我这代码比较简陋,其实遗传算法还是不够的。因为它没有学习到怎么真正躲开飞机。
其中我觉得有几个原因
- 可能是因为特征不够,我这里的输入特征就四个:
- 飞机在横向屏幕的距离比(保证飞机不要飞出去),
- 炸弹在横向屏幕的距离比
- 炸弹在纵向屏幕的距离比
- 炸弹在飞机的左边还是右边
- 神经网络比较简陋,我这个属于全连接的神经网络,鲁棒性比较逊色。其实我们可以利用强化学习去完成这个任务,例如DQL(Deep Q Learning)。然后再用遗传算法去优化,或许会有不错的效果。
Thanks
感谢各位的看鄙人在忙碌的家务中写出的杂文。今天是廿三十(除夕),明天就是2018年的春节,新的一年即将开始了。同时也说明了我过去一年又给我糟蹋浪费了。。。还是一事无成的我。
希望新的一年能够发出paper,然后在kaggle的比赛取得比较好的成绩吧。
祝大家新的一年,事事顺利,无论啥事都会过关斩将,关关难过关关过
准备去可怕的花市逛街咯。。。
网友评论