SSA-SVM

import numpy as np
import random
from matplotlib import pyplot as plt
import scipy.io as scio
from sklearn import svm
from sklearn import preprocessing
from sklearn.metrics import accuracy_score


'''优化函数'''
#定义适应函数,以测试集的错误率为适应度值
def fun(X):
    #3.训练svm分类器
    classifier=svm.SVC(C=X[0],kernel='rbf',gamma=X[1]) # ovr:一对多策略
    classifier.fit(train_wine,train_label.ravel()) #ravel函数在降维时默认是行序优先
    #4.计算svc分类器的准确率
    #tra_label=classifier.predict(train_wine) #训练集的预测标签
    tes_label=classifier.predict(test_wine) #测试集的预测标签
    output = 1 - accuracy_score(test_label,tes_label)#计算错误率,如果错误率越小,结果越优 
    return output


''' 种群初始化函数 '''
def initial(pop, dim, ub, lb):
    X = np.zeros([pop, dim])
    for i in range(pop):
        for j in range(dim):
            X[i, j] = random.random()*(ub[j] - lb[j]) + lb[j]
    print(type(X))
    return X,lb,ub
            
'''边界检查函数'''
def BorderCheck(X,ub,lb,pop,dim):
    for i in range(pop):
        for j in range(dim):
            if X[i,j]>ub[j]:
                X[i,j] = ub[j]
            elif X[i,j]<lb[j]:
                X[i,j] = lb[j]
    return X
    
    
'''计算适应度函数'''
def CaculateFitness(X,fun):
    pop = X.shape[0]
    fitness = np.zeros([pop, 1])
    for i in range(pop):
        fitness[i] = fun(X[i, :])
    return fitness

'''适应度排序'''
def SortFitness(Fit):
    fitness = np.sort(Fit, axis=0)
    index = np.argsort(Fit, axis=0)
    return fitness,index


'''根据适应度对位置进行排序'''
def SortPosition(X,index):
    Xnew = np.zeros(X.shape)
    for i in range(X.shape[0]):
        Xnew[i,:] = X[index[i],:]
    return Xnew

'''麻雀发现者更新'''
def PDUpdate(X,PDNumber,ST,Max_iter):
    X_new = np.zeros(X.shape)
    R2 = random.random()
    for j in range(PDNumber):
        if R2<ST:
            X_new[j,:] = X[j,:]*np.exp(-j/(random.random()*Max_iter))
        else:
            X_new[j,:] = X[j,:] + np.random.randn()*np.ones([1,dim])
    return X_new
        
'''麻雀加入者更新'''            
def JDUpdate(X,PDNumber,pop,dim):
    X_new = np.zeros(X.shape)
    for j in range(PDNumber+1,pop):
         if j>(pop - PDNumber)/2 + PDNumber:
             X_new[j,:]= np.random.randn()*np.exp((X[-1,:] - X[j,:])/j**2)
         else:
             #产生-1,1的随机数
             A = np.ones([dim,1])
             for a in range(dim):
                 if(random.random()>0.5):
                     A[a]=-1       
         AA = np.dot(A,np.linalg.inv(np.dot(A.T,A)))
         X_new[j,:]= X[1,:] + np.abs(X[j,:] - X[1,:])*AA.T
           
    return X_new                    
            
'''危险更新'''   
def SDUpdate(X,pop,SDNumber,fitness,BestF):
    X_new = np.zeros(X.shape)
    Temp = range(pop)
    RandIndex = random.sample(Temp, pop)
    SDchooseIndex = RandIndex[0:SDNumber]
    for j in range(SDNumber):
        if fitness[SDchooseIndex[j]]>BestF:
            X_new[SDchooseIndex[j],:] = X[1,:] + np.random.randn()*np.abs(X[SDchooseIndex[j],:] - X[1,:])
        elif fitness[SDchooseIndex[j]] == BestF:
            K = 2*random.random() - 1
            X_new[SDchooseIndex[j],:] = X[SDchooseIndex[j],:] + K*(np.abs( X[SDchooseIndex[j],:] - X[-1,:])/(fitness[SDchooseIndex[j]] - fitness[-1] + 10E-8))
    return X_new


'''麻雀搜索算法'''
def SSA(pop,dim,lb,ub,Max_iter,fun):
    ST = 0.6 #预警值
    PD = 0.7 #发现者的比列,剩下的是加入者
    SD = 0.2 #意识到有危险麻雀的比重
    PDNumber = int(pop*PD) #发现者数量
    SDNumber = int(pop*SD) #意识到有危险麻雀数量
    X,lb,ub = initial(pop, dim, ub, lb) #初始化种群
    fitness = CaculateFitness(X,fun) #计算适应度值
    fitness,sortIndex = SortFitness(fitness) #对适应度值排序
    X = SortPosition(X,sortIndex) #种群排序
    GbestScore = fitness[0]
    GbestPositon = np.zeros([1,dim])
    GbestPositon[0,:] = X[0,:]
    Curve = np.zeros([MaxIter,1])
    for i in range(MaxIter):
        
        BestF = fitness[0]
        
        X = PDUpdate(X,PDNumber,ST,Max_iter)#发现者更新
        
        X = JDUpdate(X,PDNumber,pop,dim) #加入者更新
        
        X = SDUpdate(X,pop,SDNumber,fitness,BestF) #危险更新
        
        X = BorderCheck(X,ub,lb,pop,dim) #边界检测
        fitness = CaculateFitness(X,fun) #计算适应度值
        fitness,sortIndex = SortFitness(fitness) #对适应度值排序
        X = SortPosition(X,sortIndex) #种群排序
        if(fitness[0]<=GbestScore): #更新全局最优
            GbestScore = fitness[0]
            GbestPositon[0,:] = X[0,:]
        Curve[i] = GbestScore
    
    return GbestScore,GbestPositon,Curve
    



'''主函数 '''
path= '../其他方法/chapter_WineClass.mat'  # 特征数据和标签数据
data = scio.loadmat(path)
winedata = np.transpose(data['wine']) # 转置
labeldata = np.transpose(data['wine_labels'])

# 选定训练集和测试集
# 将第一类的1-30,第二类的60-95,第三类的131-153做为训练集
# np.hstack()将参数元组的元素数组按水平方向进行叠加
train_wine=np.hstack((winedata[:,0:29],winedata[:,59:94],winedata[:,130:152]))
train_label = np.hstack((labeldata[:,0:29],labeldata[:,59:94],labeldata[:,130:152]))
#
# 将第一类的31-59,第二类的96-130,第三类的154-178做为测试集
test_wine = np.hstack((winedata[:,30:58],winedata[:,95:129],winedata[:,153:177]));
test_label = np.hstack((labeldata[:,30:58],labeldata[:,95:129],labeldata[:,153:177]))

# 数据归一化
min_max_scaler = preprocessing.MinMaxScaler() #
train_wine = min_max_scaler.fit_transform(train_wine)
test_wine = min_max_scaler.fit_transform(test_wine)

#转换为行向量
train_wine = train_wine.T
test_wine = test_wine.T
train_label = train_label.T
test_label = test_label.T


#设置麻雀参数
pop = 2 #种群数量
MaxIter = 2 #最大迭代次数
dim = 2 #维度
lb = np.matrix([[0.1],[0.1]]) #下边界
ub = np.matrix([[200],[200]])#上边界

GbestScore,GbestPositon,Curve = SSA(pop,dim,lb,ub,MaxIter,fun) 
print('最优适应度值:',GbestScore)
print('c,g最优解:',GbestPositon)
#利用最终优化的结果计算分类正确率等信息
#训练svm分类器
classifier=svm.SVC(C=GbestPositon[0,0],kernel='rbf',gamma=GbestPositon[0,1]) # ovr:一对多策略
classifier.fit(train_wine,train_label.ravel()) #ravel函数在降维时默认是行序优先
#4.计算svc分类器的准确率
tra_label=classifier.predict(train_wine) #训练集的预测标签
tes_label=classifier.predict(test_wine) #测试集的预测标签
print("训练集准确率:", accuracy_score(train_label,tra_label) )
print("测试集准确率:", accuracy_score(test_label,tes_label) )



plt.figure(1)
plt.plot(test_label,'*',label = "True")
plt.plot(tes_label,'o',label = "predict")
plt.xlabel("Test Case")
plt.ylabel("Case Label")
plt.grid(True)
plt.legend(bbox_to_anchor=(1.0, 1), loc=1, borderaxespad=0.)
plt.show()