Math Problem Statement

import torch import torch.nn as nn import torch.optim as optim import numpy as np import pandas as pd import random from sklearn.datasets import load_boston from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler

定义神经网络

class NeuralNetwork(nn.Module): def init(self, W): super(NeuralNetwork, self).init() # 初始化神经网络的权重 self.fc1 = nn.Linear(W[0].shape[1], 3) self.fc2 = nn.Linear(3, 3) self.fc3 = nn.Linear(3, 1)

    # 将给定的权重分配给网络
    self.fc1.weight = nn.Parameter(torch.tensor(W[0], dtype = torch.float32))
    self.fc2.weight = nn.Parameter(torch.tensor(W[1][:3, :], dtype = torch.float32))
    self.fc3.weight = nn.Parameter(torch.tensor(W[2][:3, 0].reshape(3, 1), dtype = torch.float32))

def forward(self, x):
    x = torch.relu(self.fc1(x))
    x = torch.relu(self.fc2(x))
    x = self.fc3(x)
    return x

def function(x1, y1, W): # 数据转换为tensor x1 = torch.tensor(x1, dtype=torch.float32) y1 = torch.tensor(y1, dtype=torch.float32)

# 初始化神经网络
net = NeuralNetwork(W)

# 损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr = 0.1)

# 开始训练模型
print("开始")
for i in range(1000):
    optimizer.zero_grad()  # 梯度清零
    outputs = net(x1)  # 前向传播
    loss = criterion(outputs, y1)  # 计算损失
    loss.backward()  # 反向传播
    optimizer.step()  # 更新权重
# 计算结束时的损失
with torch.no_grad():
    end_loss = criterion(net(x1), y1).item()
print(end_loss)
print("结束")
return end_loss

导入数据集

data = load_boston() data_pd = pd.DataFrame(data.data, columns = data.feature_names) data_pd["price"] = data.target

dataframe导入numpy

x = np.array(data_pd.loc[:, 'CRIM':'LSTAT']) y = np.array(data_pd.loc[:, 'price']) y.shape = (506, 1)

训练集测试集

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1)

数据标准化

sc = StandardScaler() x_train = sc.fit_transform(x_train) y_train = sc.fit_transform(y_train) x_test = sc.fit_transform(x_test) y_test = sc.fit_transform(y_test)

粒子数量num

num = 3

粒子位置矩阵的形状

num_x = 3 num_y = 13 num_z = 3

p为粒子位置矩阵，初始化为标准正态分布

p = np.random.randn(num, num_x, num_y, num_z)

初始化粒子速度, 以标准正态分布随机初始化

v = np.random.randn(num, num_x, num_y, num_z)

个体最佳位置

good_p = np.array(p, copy=True)

全局最佳位置

best_p = np.zeros((num_x, num_y, num_z))

每次粒子移动后所计算出的新的目标函数值

new_y = np.zeros(num)

粒子个体历史最优值

good_y = np.zeros(num)

粒子群体历史最优值

best_y = 0

计算出初始粒子群的目标函数值

for i in range(num): good_y[i] = function(x_train, y_train, x_test, y_test, p[i, :, :, :])

目标函数返回值是误差，最小的就是最优的

best_y = min(good_y)

确定初始时最优位置

best_p = p[np.argmin(good_y), :, :, :]

设置最大迭代次数

max_iter = 10

开始迭代

for i in range(max_iter): # 速度更新公式 v = random.random() * v + 2.4 * random.random() * (best_p - p) + 1.7 * random.random() * (good_p - p)

# 粒子位置更新
p = p + v

# 计算每个粒子到达新位置后所得到的目标函数值
for i in range(num):
    new_y[i] = function(x_train, y_train, x_test, y_test, p[i, :, :, :])

# 更新全局最优
if min(new_y) < best_y:
    best_y = min(new_y)
    best_p = p[np.argmin(new_y), :, :, :]

# 更新个体历史最优
for i in range(num):
    if new_y[i] < good_y[i]:
        good_y[i] = new_y[i]
        good_p[i, :, :, :] = p[i, :, :, :]  # 当对切片修改时，原始numpy数据也修改

print("结束") print('目标函数最优值：', best_y) print('此时的粒子位置：', best_p)既然使用PSO优化的神经网络权重，那么optim.SGD是做什么用的？他发挥了什么作用

Solution

Ask a new question for Free

By Image

Drop file here or Click Here to upload

Math Problem Analysis

Mathematical Concepts

Optimization Algorithms
Particle Swarm Optimization (PSO)
Stochastic Gradient Descent (SGD)

Formulas

Velocity Update in PSO: v = w * v + c1 * r1 * (p_best - p) + c2 * r2 * (g_best - p)
SGD Update: θ = θ - η * ∇L(θ)

Theorems

PSO global optimization
SGD local optimization

Suitable Grade Level

Graduate Level

Related Recommendation

Stochastic Gradient Descent with Momentum: Optimization Algorithm Explained

Convergence of Adam Optimizer in Non-Convex Optimization Problems: Relationship with Gradient Variance

Gradient Descent for Quadratic Function Minimization

Optimal Step Size in Gradient Descent for Quadratic Function Minimization

DQN Training Reward Analysis: Understanding Trends and Stability