python遗传算法应用（句子配对和TSP问题）

2019年3月19日 236次阅读来源: 遗传算法

在不了解遗传算法的基本python代码之前，请参考博客：http://blog.csdn.net/winycg/article/details/78917528

句子配对应用

针对目标语句：’Hello,World!’

种群产生：以ASCII码在[32, 126]的范围为种群基因产生种群

适应度计算：种群DNA和目标DNA相似的基因即为1，不相同即为0，计算和为适应度

变异：对于要变异的位置，产生一个[32,126]范围的数与之替换

import numpy as np
import random

target_phrase = 'Hello,World!'
population_size = 300
pc = 0.6
pm = 0.01
n_generations = 1000
DNA_length = len(target_phrase)
target_ASCII = np.fromstring(target_phrase, dtype=np.int8)
ASCII_bound = [32, 126]


class GA(object):
    def __init__(self):
        self.populations = np.random.randint(ASCII_bound[0],
                                             ASCII_bound[1],
                                             (population_size, DNA_length))

        # 将DNA翻译成对应的ASCII码字符串

    def DNA_decode(self, DNA):
        DNA_str = ''
        for item in DNA:
            DNA_str = DNA_str + chr(item)
        return DNA_str

    def calculate_fitness(self):
        return np.sum((self.populations == target_ASCII), axis=1)

    def selection(self):
        fitness = self.calculate_fitness()
        population_id = np.random.choice(np.arange(population_size), (population_size,),
                                         p=fitness / np.sum(fitness))
        new_populations = []
        for i in range(population_size):
            new_populations.append(self.populations[population_id[i]])
        self.populations = np.array(new_populations)

    def crossover(self):
        population_copy = self.populations.copy()
        for i in range(population_size):
            if random.random() < pc:
                # 找寻与i交配的个体
                crossover_object = i
                # 防止随机选择的个体还是i
                while crossover_object == i:
                    crossover_object = random.randint(0, DNA_length-1)
                crossover_points = np.random.randint(0, 2, DNA_length).astype(np.bool)
                self.populations[i][crossover_points] = population_copy[i][crossover_points]
                self.populations[i][~crossover_points] = population_copy[crossover_object][~crossover_points]

    def mutation(self):
        for i in range(population_size):
            for point in range(DNA_length):
                if random.random() < pm:
                    self.populations[i][point] = random.randint(ASCII_bound[0], ASCII_bound[1])

    def evolve(self):
        self.selection()
        self.crossover()
        self.mutation()


ga = GA()
for step in range(n_generations):
    # 找寻fitness最大的DNA
    best_DNA = ga.populations[np.argmax(ga.calculate_fitness())]
    print('the best DNA: ', ga.DNA_decode(best_DNA))
    ga.evolve()

TSP问题

参考链接：https://morvanzhou.github.io/tutorials/machine-learning/evolutionary-algorithm/2-03-genetic-algorithm-travel-sales-problem/

针对n个城市，找出经过这n个城市仅一次的最短路径

种群构成：0~n-1的随机排列组合

比如要经过3个城市, 则有如下的组合方式：
0-1-2
0-2-1
1-0-2
1-2-0
2-0-1
2-1-0

适应度计算：由于需要找最短路径，可以让适应度与距离成反比，这样就可以找适应度最大的个体

我们要选择合适的适应度函数，使得适应度值差值能放大距离差值，这样使得选择（selection）的时候，适应度大的个体更容易被选择。不进行放大的话，由于距离值差值较小，使得适应度差值较小，选点的时候差异会不明显。

交叉：设父亲=[0,1,2,3]，母亲=[2,1,3,0]

产生个体过程如下：

首先先从父亲DNA中随机选择某些点放入后代DNA,假设选择[1,3]：后代DNA=[1,3,_,_]

后代还需要剩下的0,2点，这时加入的0,2点就是0,2点在母DNA上的相对顺序：后代DNA=[1,3,2,0]

变异：对于一个DNA，随机交换DNA中的两个位置

import numpy as np
import random
import matplotlib.pyplot as plt


population_size = 300
pc = 0.4
pm = 0.02
n_generations = 500
DNA_length = 20
city_coordinate = np.random.rand(DNA_length, 2)
fig = plt.figure()


class GA(object):
    def __init__(self):
        self.populations = np.vstack([np.random.permutation(DNA_length)
                                      for _ in range(population_size)])

    # 将DNA翻译成每个点的坐标
    def DNA_decode(self, DNA_id):
        DNA = self.populations[DNA_id]
        # np.empty分配内存空间，生成的值为垃圾值
        coordinate_x = np.empty_like(DNA, dtype=np.float)
        coordinate_y = np.empty_like(DNA, dtype=np.float)
        for i in range(DNA_length):
            coordinate_x[i] = city_coordinate[DNA[i], 0]
            coordinate_y[i] = city_coordinate[DNA[i], 1]
        return coordinate_x, coordinate_y

    def calculate_fitness(self):
        distance = np.empty((population_size,))
        for i in range(population_size):
            DNA_x, DNA_y = self.DNA_decode(i)
            # 欧式距离公式
            distance[i] = np.sum(np.hstack((np.square(np.diff(DNA_x)), np.square(np.diff(DNA_y)))))
        fitness = np.exp(20/distance)
        return fitness, distance

    def selection(self):
        fitness, _ = self.calculate_fitness()
        population_id = np.random.choice(np.arange(population_size), (population_size,),
                                         p=fitness/np.sum(fitness))
        new_populations = []
        for i in range(population_size):
            new_populations.append(self.populations[population_id[i]])
        self.populations = np.array(new_populations)

    def crossover(self):
        for i in range(0, population_size - 1, 2):
            if random.random() < pc:
                father_points = np.random.randint(0, 2, DNA_length).astype(np.bool)
                father_city1 = self.populations[i][father_points]
                father_city2 = self.populations[i][np.invert(father_points)]
                # np.isin函数判断后者的元素是否在前者数组中，返回一个bool数组
                # invert=True表示可以将True编程False，False变成True
                # 参考：https://docs.scipy.org/doc/numpy/reference/generated/numpy.isin.html
                mother_points = np.isin(self.populations[i+1], father_city1, invert=True)
                mother_city1 = self.populations[i+1][mother_points]
                mother_city2 = self.populations[i+1][np.invert(mother_points)]
                self.populations[i] = np.hstack((father_city1, mother_city1))
                self.populations[i + 1] = np.hstack((father_city2, mother_city2))

    def mutation(self):
        for i in range(population_size):
            for point in range(DNA_length):
                if random.random() < pm:
                    swap_point = random.randint(0, DNA_length -1)
                    swap_a, swap_b = self.populations[i][point], self.populations[i][swap_point]
                    self.populations[i][point], self.populations[i][swap_point] = swap_b, swap_a

    def evolve(self):
        self.selection()
        self.crossover()
        self.mutation()


def plot_route(dist_min, DNA_coordinate):
    # 清空fig
    plt.clf()
    plt.scatter(city_coordinate[:, 0], city_coordinate[:, 1], s=100, c='k')
    plt.plot(DNA_coordinate[0], DNA_coordinate[1],color='red')
    plt.xlim((-0.1, 1.1))
    plt.ylim((-0.1, 1.1))
    plt.text(-0.05, -0.05, 'total distance=%.3f' % dist_min)
    plt.pause(0.01)


ga = GA()
for step in range(n_generations):
    # 找寻fitness最大的DNA
    ga_fitness, ga_dist = ga.calculate_fitness()
    best_DNA_id = np.argmax(ga_fitness)
    print('the best DNA: %.3f, mean distance: %.3f' % (ga_dist[best_DNA_id], np.mean(ga_dist)))
    plot_route(ga_dist[best_DNA_id], ga.DNA_decode(best_DNA_id))
    ga.evolve()

《python遗传算法应用（句子配对和TSP问题）》

    原文作者：遗传算法
    原文地址: https://blog.csdn.net/winycg/article/details/78998350
    本文转自网络文章，转载此文章仅为分享知识，如有侵权，请联系博主进行删除。