600字范文,内容丰富有趣,生活中的好帮手!
600字范文 > python实现 模糊C均值聚类算法(Fuzzy-C-Means)-基于iris数据集

python实现 模糊C均值聚类算法(Fuzzy-C-Means)-基于iris数据集

时间:2023-04-16 02:15:07

相关推荐

python实现  模糊C均值聚类算法(Fuzzy-C-Means)-基于iris数据集

# python3模糊C均值聚类算法,数据集为iris,放在和代码同一目录即可。算法原理及步骤参考:/view/ee968c00eff9aef8941e06a2.html

import copyimport mathimport randomimport timeimport sysimport matplotlib.pyplot as pltimport matplotlib.animation as animationimport decimal# 用于初始化隶属度矩阵Uglobal MAXMAX = 10000.0# 用于结束条件global EpsilonEpsilon = 0.00000001def import_data_format_iris(file):""" 格式化数据,前四列为data,最后一列为cluster_location数据地址 http://archive.ics.uci.edu/ml/machine-learning-databases/iris/"""data = []cluster_location =[]with open(str(file), 'r') as f:for line in f:current = line.strip().split(",")current_dummy = []for j in range(0, len(current)-1):current_dummy.append(float(current[j]))j += 1 if current[j] == "Iris-setosa\n":cluster_location.append(0)elif current[j] == "Iris-versicolor\n":cluster_location.append(1)else:cluster_location.append(2)data.append(current_dummy)print ("加载数据完毕")return data , cluster_locationdef randomise_data(data):"""该功能将数据随机化,并保持随机化顺序的记录"""order = list(range(0, len(data)))random.shuffle(order)new_data = [[] for i in range(0, len(data))]for index in range(0, len(order)):new_data[index] = data[order[index]]return new_data, orderdef de_randomise_data(data, order):"""此函数将返回数据的原始顺序,将randomise_data()返回的order列表作为参数"""new_data = [[]for i in range(0, len(data))]for index in range(len(order)):new_data[order[index]] = data[index]return new_datadef print_matrix(list):""" 以可重复的方式打印矩阵"""for i in range(0, len(list)):print (list[i])def initialise_U(data, cluster_number):"""这个函数是隶属度矩阵U的每行加起来都为1. 此处需要一个全局变量MAX."""global MAXU = []for i in range(0, len(data)):current = []rand_sum = 0.0for j in range(0, cluster_number):dummy = random.randint(1,int(MAX))current.append(dummy)rand_sum += dummyfor j in range(0, cluster_number):current[j] = current[j] / rand_sumU.append(current)return Udef distance(point, center):"""该函数计算2点之间的距离(作为列表)。我们指欧几里德距离。 闵可夫斯基距离"""if len(point) != len(center):return -1dummy = 0.0for i in range(0, len(point)):dummy += abs(point[i] - center[i]) ** 2return math.sqrt(dummy)def end_conditon(U, U_old):"""结束条件。当U矩阵随着连续迭代停止变化时,触发结束"""global Epsilonfor i in range(0, len(U)):for j in range(0, len(U[0])):if abs(U[i][j] - U_old[i][j]) > Epsilon :return Falsereturn Truedef normalise_U(U):"""在聚类结束时使U模糊化。每个样本的隶属度最大的为1,其余为0"""for i in range(0, len(U)):maximum = max(U[i])for j in range(0, len(U[0])):if U[i][j] != maximum:U[i][j] = 0else:U[i][j] = 1return U# m的最佳取值范围为[1.5,2.5]def fuzzy(data, cluster_number, m):"""这是主函数,它将计算所需的聚类中心,并返回最终的归一化隶属矩阵U.参数是:簇数(cluster_number)和隶属度的因子(m)"""# 初始化隶属度矩阵UU = initialise_U(data, cluster_number)# print_matrix(U)# 循环更新Uwhile (True):# 创建它的副本,以检查结束条件U_old = copy.deepcopy(U)# 计算聚类中心C = []for j in range(0, cluster_number):current_cluster_center = []for i in range(0, len(data[0])):dummy_sum_num = 0.0dummy_sum_dum = 0.0for k in range(0, len(data)):# 分子dummy_sum_num += (U[k][j] ** m) * data[k][i]# 分母dummy_sum_dum += (U[k][j] ** m)# 第i列的聚类中心current_cluster_center.append(dummy_sum_num/dummy_sum_dum)# 第j簇的所有聚类中心C.append(current_cluster_center)# 创建一个距离向量, 用于计算U矩阵。distance_matrix =[]for i in range(0, len(data)):current = []for j in range(0, cluster_number):current.append(distance(data[i], C[j]))distance_matrix.append(current)# 更新Ufor j in range(0, cluster_number):for i in range(0, len(data)):dummy = 0.0for k in range(0, cluster_number):# 分母dummy += (distance_matrix[i][j ] / distance_matrix[i][k]) ** (2/(m-1))U[i][j] = 1 / dummyif end_conditon(U, U_old):print ("结束聚类")breakprint ("标准化 U")U = normalise_U(U)return Udef checker_iris(final_location):"""和真实的聚类结果进行校验比对"""right = 0.0for k in range(0, 3):checker =[0,0,0]for i in range(0, 50):for j in range(0, len(final_location[0])):if final_location[i + (50*k)][j] == 1:checker[j] += 1right += max(checker)print (right)answer = right / 150 * 100return "准确度:" + str(answer) + "%"if __name__ == '__main__':# 加载数据data, cluster_location = import_data_format_iris("iris.txt")# print_matrix(data)# 随机化数据data , order = randomise_data(data)# print_matrix(data)start = time.time()# 现在我们有一个名为data的列表,它只是数字# 我们还有另一个名为cluster_location的列表,它给出了正确的聚类结果位置# 调用模糊C均值函数final_location = fuzzy(data , 2 , 2)# 还原数据final_location = de_randomise_data(final_location, order)# print_matrix(final_location)# 准确度分析print (checker_iris(final_location))print ("用时:{0}".format(time.time() - start))

本内容不代表本网观点和政治立场,如有侵犯你的权益请联系我们处理。
网友评论
网友评论仅供其表达个人看法,并不表明网站立场。