Math Problem Statement
import torch import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd import random from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler
定义神经网络
class NeuralNetwork(nn.Module): def init(self, W): super(NeuralNetwork, self).init() # 初始化神经网络的权重 self.fc1 = nn.Linear(W[0].shape[1], 3) self.fc2 = nn.Linear(3, 3) self.fc3 = nn.Linear(3, 1)
# 将给定的权重分配给网络
self.fc1.weight = nn.Parameter(torch.tensor(W[0], dtype = torch.float32))
self.fc2.weight = nn.Parameter(torch.tensor(W[1][:3, :], dtype = torch.float32))
self.fc3.weight = nn.Parameter(torch.tensor(W[2][:3, 0].reshape(3, 1), dtype = torch.float32))
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
def function(x1, y1, W): # 数据转换为tensor x1 = torch.tensor(x1, dtype=torch.float32) y1 = torch.tensor(y1, dtype=torch.float32)
# 初始化神经网络
net = NeuralNetwork(W)
# 损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr = 0.1)
# 开始训练模型
print("开始")
for i in range(1000):
optimizer.zero_grad() # 梯度清零
outputs = net(x1) # 前向传播
loss = criterion(outputs, y1) # 计算损失
loss.backward() # 反向传播
optimizer.step() # 更新权重
# 计算结束时的损失
with torch.no_grad():
end_loss = criterion(net(x1), y1).item()
print(end_loss)
print("结束")
return end_loss
导入数据集
data = load_boston() data_pd = pd.DataFrame(data.data, columns = data.feature_names) data_pd["price"] = data.target
dataframe导入numpy
x = np.array(data_pd.loc[:, 'CRIM':'LSTAT']) y = np.array(data_pd.loc[:, 'price']) y.shape = (506, 1)
训练集测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)
数据标准化
sc = StandardScaler() x_train = sc.fit_transform(x_train) y_train = sc.fit_transform(y_train) x_test = sc.fit_transform(x_test) y_test = sc.fit_transform(y_test)
粒子数量num
num = 3
粒子位置矩阵的形状
num_x = 3 num_y = 13 num_z = 3
p为粒子位置矩阵,初始化为标准正态分布
p = np.random.randn(num, num_x, num_y, num_z)
初始化粒子速度, 以标准正态分布随机初始化
v = np.random.randn(num, num_x, num_y, num_z)
个体最佳位置
good_p = np.array(p, copy=True)
全局最佳位置
best_p = np.zeros((num_x, num_y, num_z))
每次粒子移动后所计算出的新的目标函数值
new_y = np.zeros(num)
粒子个体历史最优值
good_y = np.zeros(num)
粒子群体历史最优值
best_y = 0
计算出初始粒子群的目标函数值
for i in range(num): good_y[i] = function(x_train, y_train, x_test, y_test, p[i, :, :, :])
目标函数返回值是误差,最小的就是最优的
best_y = min(good_y)
确定初始时最优位置
best_p = p[np.argmin(good_y), :, :, :]
设置最大迭代次数
max_iter = 10
开始迭代
for i in range(max_iter): # 速度更新公式 v = random.random() * v + 2.4 * random.random() * (best_p - p) + 1.7 * random.random() * (good_p - p)
# 粒子位置更新
p = p + v
# 计算每个粒子到达新位置后所得到的目标函数值
for i in range(num):
new_y[i] = function(x_train, y_train, x_test, y_test, p[i, :, :, :])
# 更新全局最优
if min(new_y) < best_y:
best_y = min(new_y)
best_p = p[np.argmin(new_y), :, :, :]
# 更新个体历史最优
for i in range(num):
if new_y[i] < good_y[i]:
good_y[i] = new_y[i]
good_p[i, :, :, :] = p[i, :, :, :] # 当对切片修改时,原始numpy数据也修改
print("结束") print('目标函数最优值:', best_y) print('此时的粒子位置:', best_p)既然使用PSO优化的神经网络权重,那么optim.SGD是做什么用的?他发挥了什么作用
Solution
Ask a new question for Free
By Image
Drop file here or Click Here to upload
Math Problem Analysis
Mathematical Concepts
Optimization Algorithms
Particle Swarm Optimization (PSO)
Stochastic Gradient Descent (SGD)
Formulas
Velocity Update in PSO: v = w * v + c1 * r1 * (p_best - p) + c2 * r2 * (g_best - p)
SGD Update: θ = θ - η * ∇L(θ)
Theorems
PSO global optimization
SGD local optimization
Suitable Grade Level
Graduate Level
Related Recommendation
Stochastic Gradient Descent with Momentum: Optimization Algorithm Explained
Convergence of Adam Optimizer in Non-Convex Optimization Problems: Relationship with Gradient Variance
Gradient Descent for Quadratic Function Minimization
Optimal Step Size in Gradient Descent for Quadratic Function Minimization
DQN Training Reward Analysis: Understanding Trends and Stability