1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
| import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from mpi4py import MPI
# 初始化MPI环境
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
world_size = comm.Get_size()
# 数据集分片
class CustomDataset(Dataset):
def __len__(self):
return 1000
def __getitem__(self, idx):
idx = (idx + rank) % 1000
return torch.randn(3, 28, 28), torch.randint(0, 10, (1,))
dataset = CustomDataset()
dataloader = DataLoader(dataset, batch_size=32, shuffle=False)
# 定义模型
model = nn.Sequential(
nn.Conv2d(3, 16, 3), nn.ReLU(), nn.Flatten(), nn.Linear(16*26*26, 10)
).cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
# 训练循环
for epoch in range(10):
for data, target in dataloader:
data, target = data.cuda(), target.cuda()
optimizer.zero_grad()
output = model(data)
loss = loss_fn(output, target)
loss.backward()
# 梯度同步:AllReduce求和
for param in model.parameters():
grad_np = param.grad.data.cpu().numpy()
comm.Allreduce(MPI.IN_PLACE, grad_np, op=MPI.SUM)
param.grad.data = torch.from_numpy(grad_np / world_size).cuda()
optimizer.step()
if rank == 0:
torch.save(model.state_dict(), f"model_epoch{epoch}.pth")
|
Comments