Skip to content

dp_demo.py

python
import torch
import torch.nn as nn
import torch.optim as optim

# import torch.distributed as dist

import torchvision.datasets as datasets
import torchvision.transforms as transforms

# 定义分布式环境

# dist.init_process_group(backend="gloo", rank=0, world_size=2)

USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda:0" if USE_CUDA else "cpu")

# 加载数据集

train_dataset = datasets.MNIST(root="./data", train=True, transform=transforms.ToTensor(), download=True)

# train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset, num_replicas=2, rank=0) # update num_replicas to gpus num

# train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=False, sampler=train_sampler)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=False)

# 定义模型

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 512)
        self.fc2 = nn.Linear(512, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        print("=============x shape: ", x.shape)
        x = x.view(-1, 784)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = Net()

# 将模型复制到多个GPU上

model = nn.DataParallel(model).to(device)

# 定义损失函数和优化器

content = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 训练模型

for epoch in range(10):
    # train_sampler.set_epoch(epoch)
    for data, target in train_loader:
        print("data shape: ", data.shape)
        data = data.to(device)
        target = target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = content(output, target)
        loss.backward()
        optimizer.step()

    print("Epoch {} completed".format(epoch))