前馈神经网络实验

  1. 手动实现前馈神经网络解决回归、二分类、多分类任务,分析实验结果并绘制训练集和测试集的loss曲线;
  2. 利用torch.nn实现前馈神经网络解决上述回归、二分类、多分类任务,分析实验结果并绘制训练集和测试集的loss曲线;
  3. 在多分类实验的基础上使用至少三种不同的激活函数,对比使用不同激活函数的实验结果;
  4. 对多分类任务中的模型,评估隐藏层层数和隐藏单元个数对实验结果的影响使用不同的隐藏层层数和隐藏单元个数,进行对比实验并分析实验结果;
  5. 在多分类任务实验中分别手动实现和用torch.nn实现dropout,探究不同丢弃率对实验结果的影响(可用loss曲线进行展示);
  6. 在多分类任务实验中分别手动实现和用torch.nn实现L2正则化,探究惩罚项的权重对实验结果的影响(可用loss曲线进行展示);
  7. 对回归、二分类、多分类任务分别选择上述实验中效果最好的模型,采用10折交叉验证评估实验结果,要求除了最终结果外还需以表格的形式展示每折的实验结果;

本次实验所使用的是在Pycharm 环境下安装的Python 3.9.7版本以及Pytorch 1.10。

1)回归任务的数据集

回归任务为是单个数据集,数据集的大小为10000且训练集大小为7000,测试集大小为3000。数据集的样本特征维度p为500,且服从如下的高维线性函数。

import torch
import numpy as np

num_inputs = 500
num_examples = 10000
x_features = torch.tensor(np.random.normal(0, 1, (num_examples, num_inputs)), dtype=torch.float)
y_labels = torch.mm(x_features,torch.ones(500,1)*0.0056) + 0.028
y_labels += torch.tensor(np.random.normal(0, 0.01, size=y_labels.size()), dtype=torch.float)
#训练集
trainfeatures =x_features[:7000]
trainlabels = y_labels[:7000]
#测试集
testfeatures =x_features[7000:]
testlabels = y_labels[7000:]

2)二分类任务的数据集

二分类任务的数据集由两个数据集构成,两个数据集的大小均为10000且训练集大小为7000,测试集大小为3000。两个数据集的样本特征x的维度均为200,且分别服从均值互为相反数且方差相同的正态分布。两个数据集的样本标签分别为0和1。通过键入下面代码,构造该数据集。

import torch
from matplotlib import pyplot as plt

num_inputs = 200
#1类
x1 = torch.normal(1,1,(10000, num_inputs))
y1 = torch.ones(10000,1)
x1_train = x1[:7000]
x1_test  = x1[7000:]
#0类
x2 = torch.normal(-1,1,(10000, num_inputs))
y2 = torch.zeros(10000,1)
x2_train = x2[:7000]
x2_test  = x2[7000:]

# 合并训练集
train_features = torch.cat((x1_train,x2_train), 0).type(torch.FloatTensor)
train_labels   = torch.cat((y1[:7000], y2[:7000]), 0).type(torch.FloatTensor)
# 合并测试集
test_features = torch.cat((x1_test,x2_test), 0).type(torch.FloatTensor)
test_labels   = torch.cat((y1[7000:], y2[7000:]), 0).type(torch.FloatTensor)

plt.scatter(train_features.data.numpy()[:, 0], train_features.data.numpy()[:, 1], c=train_labels.data.numpy(), s=100, lw=0, cmap='RdYlGn')
plt.show()

3)多分类数据集

多分类数据集为MNIST手写体数据集,该数据集包含60000个用于训练的图像样本和10000个用于测试的图像样本。图像是固定大小(28×28像素),其值为0到1。为每个图像都被平展并转换为784个特征的一维numpy数组。通过键入下面代码,下载MNIST手写体数据集。

import torch                                            #导入pytorch框架
import torchvision
import torchvision.transforms as transforms

## 生成数据
# 获取FashionMNIST数据集,将所有数据转化为Tensor
mnist_train = torchvision.datasets.MNIST(root='~/Datasets/MNIST',
                                         train=True, download=True, transform=transforms.ToTensor())
mnist_test  = torchvision.datasets.MNIST(root='~/Datasets/MNIST',
                                         train=False, transform=transforms.ToTensor())

# 通过DataLoader 读取小批量数据样本
batch_size = 128
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,num_workers=0)
test_iter  = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False,num_workers=0)

1、手动实现前馈神经网络解决回归、二分类、多分类任务,分析实验结果并绘制训练集和测试集的loss曲线

(1)回归

## 导入实验所需的包
import numpy as np
import torch
from torch.utils import data
from matplotlib import pyplot as plt

## 第一部分:构建回归任务数据集
n_train, n_test, num_inputs = 7000,3000,500
true_w, true_b = torch.ones(num_inputs, 1), 0.028         #添加噪声项和b
features=torch.randn((n_train+ n_test, num_inputs))
labels = torch.matmul(features, true_w) + true_b
labels+=torch.tensor(np.random.normal(0,0.01, size=labels.size()))
train_features,test_features = features[:n_train, :], features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:, :]
print(train_features.shape) # 看看维数对不对
print(test_features.shape)  # 看看维数对不对

## 第二部分:定义数据迭代器
dataset_train = data.TensorDataset(train_features, train_labels)
dataset_test  = data.TensorDataset(test_features , test_labels )
batch_size    = 50
train_iter    = data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter     = data.DataLoader(dataset=dataset_test , batch_size=batch_size, shuffle=True, num_workers=0)

## 第三部分:定义模型及其前向传播过程
class Net():
    def __init__(self):
        # 定义并初始化模型参数
        num_inputs, num_outputs, num_hiddens = 500,1,256
        W1 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_inputs)), dtype=torch.float32)
        b1 = torch.zeros (1, dtype=torch.float32)
        W2 = torch.tensor(np.random.normal(0, 0.01, (num_outputs, num_hiddens)), dtype=torch.float32)
        b2 = torch.zeros (1, dtype=torch.float32)
        # 上述四个变量求梯度
        self.params = [W1, b1, W2, b2]
        for param in self.params:
            param.requires_grad_(requires_grad = True)
        # 定义模型的结构
        self.inputs_layer  = lambda x: x.view(x.shape[0],-1)
        self.hiddens_layer = lambda x: self.my_ReLU(torch.matmul(x, W1.t())+ b1)
        self.outputs_layer  = lambda x: torch.matmul(x, W2.t())+ b2
    @staticmethod
    def my_ReLU(x):
        return torch.max(input=x,other=torch.tensor(0.0))
    def forward(self, x):
        flatten_input = self.inputs_layer(x)
        hidden_output = self.hiddens_layer(flatten_input)
        final_output  = self.outputs_layer(hidden_output)
        return final_output

## 第四部分:定义损失函数及优化算法
loss_func = torch.nn.MSELoss()
def SGD (params,lr):
    for param in params:
        param.data-=lr*param.grad

## 第五部分:定义测试函数
def test(data_iter,net,loss_func) :
    test_loss_sum,c = 0.0,0
    for X, y in data_iter:
        result = net.forward(X)
        test_loss_sum+= loss_func(result, y).item()
        c +=1
    return test_loss_sum/c

## 第六部分:定义模型训练函数
def train(net, train_iter,loss_func, num_epochs,batch_size,lr=None,optimizer=None):
    train_loss_list = []
    test_loss_list  = []
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,c = 0.0,0.0,0,0
        for X,y in train_iter: # x和y分别是小批量样本的特征和标签
            y_hat = net.forward(X)
            l=loss_func(y_hat,y)
            l.backward()
            optimizer(net.params, lr)
            for param in net.params:
                param.grad.data.zero_()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            c += 1
        test_loss = test(test_iter,net,loss_func)
        train_loss_list.append(train_l_sum/c)
        test_loss_list.append(test_loss)
        # draw_loss(train_l_sum/c, test_loss, None)
        print('epoch %d, train_loss %.4f,test_loss %.4f'%(epoch+1, train_l_sum/c,test_loss))
    return train_loss_list,test_loss_list


## 第七部分:结果可视化
def draw_loss(train_loss, test_loss,valid_loss=None):
    x = np.linspace(0,len(train_loss),len(train_loss))\
        if valid_loss is None else np.linspace(0,len(train_loss),len(test_loss),len(valid_loss))
    plt.plot(x,train_loss,label="Train_Loss",linewidth=1.5)
    plt.plot(x,test_loss,label="Test_Loss",linewidth=1.5)
    if valid_loss is not None:
        plt.plot(x,test_loss,label="Valid_loss",linewidth=1.5)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

## 第八部分:训练模型
net=Net()
num_epochs=100
lr = 0.003
optimizer=SGD
train_loss,test_loss = train(net, train_iter,loss_func,num_epochs,batch_size,lr,optimizer)
draw_loss(train_loss,test_loss)

(2)二分类

## 导入实验所需的包
import numpy as np
import torch
from torch.utils import data
from matplotlib import pyplot as plt

## 第一部分:构建二分类任务数据集
n_train, n_test, num_inputs = 7000,3000,200
# 数据集1:样本标签为0
x0 = torch.normal(1,1,(n_train+ n_test, num_inputs))
y0 = torch.zeros(n_train+ n_test,1)
# 数据集2:样本标签为1
x1 = torch.normal(-1,1,(n_train+ n_test, num_inputs))
y1 = torch.ones(n_train+ n_test,1)
# torch.cat合并数据集
train_features = torch.cat((x0[:n_train], x1[:n_train]), 0).type(torch.FloatTensor)
train_labels   = torch.cat((y0[:n_train], y1[:n_train]), 0).type(torch.FloatTensor)
test_features  = torch.cat((x0[n_train:], x1[n_train:]), 0).type(torch.FloatTensor)
test_labels    = torch.cat((y0[n_train:], y1[n_train:]), 0).type(torch.FloatTensor)
print(train_features.shape,train_labels.shape,test_features.shape,test_labels.shape)

## 第二部分:定义数据迭代器
dataset_train = data.TensorDataset(train_features, train_labels)
dataset_test  = data.TensorDataset(test_features , test_labels )
batch_size    = 50
train_iter    = data.DataLoader(dataset=dataset_train, batch_size=batch_size, shuffle=True, num_workers=0)
test_iter     = data.DataLoader(dataset=dataset_test , batch_size=batch_size, shuffle=True, num_workers=0)

## 第三部分:定义模型及其前向传播过程
class Net():
    def __init__(self):
        # 定义并初始化模型参数
        num_inputs, num_outputs, num_hiddens = 200,1,256
        W1 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_inputs)), dtype=torch.float32)
        b1 = torch.zeros (1, dtype=torch.float32)
        W2 = torch.tensor(np.random.normal(0, 0.01, (num_outputs, num_hiddens)), dtype=torch.float32)
        b2 = torch.zeros (1, dtype=torch.float32)
        # 上述四个变量求梯度
        self.params = [W1, b1, W2, b2]
        for param in self.params:
            param.requires_grad_(requires_grad = True)
        # 定义模型的结构
        self.inputs_layer  = lambda x: x.view(x.shape[0],-1)
        self.hiddens_layer = lambda x: self.my_ReLU(torch.matmul(x, W1.t())+ b1)
        self.outputs_layer  = lambda x: torch.matmul(x, W2.t())+ b2
    @staticmethod
    def my_ReLU(x):
        return torch.max(input=x,other=torch.tensor(0.0))
    def forward(self, x):
        flatten_input = self.inputs_layer(x)
        hidden_output = self.hiddens_layer(flatten_input)
        final_output  = self.outputs_layer(hidden_output)
        return final_output

## 第四部分:定义损失函数及优化算法
loss_func = torch.nn.BCEWithLogitsLoss()
def SGD (params,lr):
    for param in params:
        param.data-=lr*param.grad

## 第五部分:定义测试函数
def test(data_iter,net,loss_func) :
    test_loss_sum,c = 0.0,0
    for X, y in data_iter:
        result = net.forward(X)
        test_loss_sum+= loss_func(result, y).item()
        c +=1
    return test_loss_sum/c

## 第六部分:定义模型训练函数
def train(net, train_iter,loss_func, num_epochs,batch_size,lr=None,optimizer=None):
    train_loss_list = []
    test_loss_list  = []
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,c = 0.0,0.0,0,0
        for X,y in train_iter: # x和y分别是小批量样本的特征和标签
            y_hat = net.forward(X)
            l=loss_func(y_hat,y)
            l.backward()
            optimizer(net.params, lr)
            for param in net.params:
                param.grad.data.zero_()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            c += 1
        test_loss = test(test_iter, net,loss_func)
        train_loss_list.append(train_l_sum/c)
        test_loss_list.append(test_loss)
        # draw_loss(train_l_sum/c, test_loss, None)
        print('epoch %d, train_loss %.4f,test_loss %.4f'%(epoch+1, train_l_sum/c,test_loss))
    return train_loss_list,test_loss_list

## 第七部分:结果可视化
def draw_loss(train_loss, test_loss,valid_loss=None):
    x = np.linspace(0,len(train_loss),len(train_loss))\
        if valid_loss is None else np.linspace(0,len(train_loss),len(test_loss),len(valid_loss))
    plt.plot(x,train_loss,label="Train_Loss",linewidth=1.5)
    plt.plot(x,test_loss,label="Test_Loss",linewidth=1.5)
    if valid_loss is not None:
        plt.plot(x,test_loss,label="Valid_loss",linewidth=1.5)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

## 第八部分:训练模型
net=Net()
num_epochs=100
lr = 0.003
optimizer=SGD
train_loss,test_loss = train(net, train_iter,loss_func,num_epochs,batch_size,lr,optimizer)
draw_loss(train_loss,test_loss)

(3)多分类

## 导入实验所需的包
import numpy as np
import torch
from torch.utils import data
from matplotlib import pyplot as plt
import torchvision
import torchvision.transforms as transforms

## 第一部分:构建二分类任务数据集
mnist_train = torchvision.datasets.MNIST(root='~/Datasets/MNIST',train=True, download=True, transform=transforms.ToTensor())
mnist_test  = torchvision.datasets.MNIST(root='~/Datasets/MNIST',train=False, transform=transforms.ToTensor())

## 第二部分:定义数据迭代器
# 通过DataLoader 读取小批量数据样本
batch_size = 128
train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True,num_workers=0)
test_iter  = torch.utils.data.DataLoader(mnist_test,  batch_size=batch_size, shuffle=False,num_workers=0)

## 第三部分:定义模型及其前向传播过程
class Net():
    def __init__(self):
        # 定义并初始化模型参数
        num_inputs, num_outputs, num_hiddens = 784,10,256
        W1 = torch.tensor(np.random.normal(0, 0.01, (num_hiddens, num_inputs)), dtype=torch.float32)
        b1 = torch.zeros (1, dtype=torch.float32)
        W2 = torch.tensor(np.random.normal(0, 0.01, (num_outputs, num_hiddens)), dtype=torch.float32)
        b2 = torch.zeros (1, dtype=torch.float32)
        # 上述四个变量求梯度
        self.params = [W1, b1, W2, b2]
        for param in self.params:
            param.requires_grad_(requires_grad = True)
        # 定义模型的结构
        self.inputs_layer  = lambda x: x.view(x.shape[0],-1)
        self.hiddens_layer = lambda x: self.my_ReLU(torch.matmul(x, W1.t())+ b1)
        self.outputs_layer = lambda x: torch.matmul(x, W2.t())+ b2
    @staticmethod
    def my_ReLU(x):
        return torch.max(input=x,other=torch.tensor(0.0))
    def forward(self, x):
        flatten_input = self.inputs_layer(x)
        hidden_output = self.hiddens_layer(flatten_input)
        final_output  = self.outputs_layer(hidden_output)
        return final_output

## 第四部分:定义损失函数及优化算法
loss_func = torch.nn.CrossEntropyLoss()
def SGD (params,lr):
    for param in params:
        param.data-=lr*param.grad

## 第五部分:定义测试函数
def test(data_iter,net,loss_func) :
    test_loss_sum,c = 0.0,0
    for X, y in data_iter:
        result = net.forward(X)
        test_loss_sum+= loss_func(result, y).item()
        c +=1
    return test_loss_sum/c

## 第六部分:定义模型训练函数
def train(net,train_iter,loss_func, num_epochs,batch_size,lr=None,optimizer=None):
    train_loss_list = []
    test_loss_list  = []
    for epoch in range(num_epochs):
        train_l_sum,train_acc_sum,n,c = 0.0,0.0,0,0
        for X,y in train_iter: # x和y分别是小批量样本的特征和标签
            y_hat = net.forward(X)
            l=loss_func(y_hat,y)
            l.backward()
            optimizer(net.params, lr)
            for param in net.params:
                param.grad.data.zero_()
            train_l_sum += l.item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item()
            n += y.shape[0]
            c += 1
        test_loss = test(test_iter, net,loss_func)
        train_loss_list.append(train_l_sum/c)
        test_loss_list.append(test_loss)
        # draw_loss(train_l_sum/c, test_loss, None)
        print('epoch %d, train_loss %.4f,test_loss %.4f'%(epoch+1, train_l_sum/c,test_loss))
    return train_loss_list,test_loss_list

## 第七部分:结果可视化
def draw_loss(train_loss, test_loss,valid_loss=None):
    x = np.linspace(0,len(train_loss),len(train_loss))\
        if valid_loss is None else np.linspace(0,len(train_loss),len(test_loss),len(valid_loss))
    plt.plot(x,train_loss,label="Train_Loss",linewidth=1.5)
    plt.plot(x,test_loss,label="Test_Loss",linewidth=1.5)
    if valid_loss is not None:
        plt.plot(x,test_loss,label="Valid_loss",linewidth=1.5)
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

## 第八部分:训练模型
net=Net()
num_epochs=100
lr = 0.03
optimizer=SGD
train_loss,test_loss = train(net,train_iter,loss_func,num_epochs,batch_size,lr,optimizer)
draw_loss(train_loss,test_loss)

代码实在是太多了,具体代码见:🍞正在装饰个人主页...