Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 14 additions & 2 deletions torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,12 +241,24 @@ std::chrono::milliseconds ProcessGroupGloo::AsyncWork::getTimeout() const {
namespace {
c10::intrusive_ptr<c10::ivalue::Future> createFutureAsOutput(
const std::vector<std::vector<at::Tensor>>& outputTensors) {
// We need to set device in future construction otherwise CUDA streams in
// futures are ignored.
std::vector<at::Device> devices{};
for (const auto& outputTensor : outputTensors) {
for (const auto& tensor : outputTensor) {
auto device = tensor.device();
if (!device.is_cpu()) {
devices.push_back(device);
}
}
}
if (outputTensors.size() > 1) {
return c10::make_intrusive<c10::ivalue::Future>(
c10::ListType::create(c10::ListType::create(c10::TensorType::get())));
c10::ListType::create(c10::ListType::create(c10::TensorType::get())),
devices);
}
return c10::make_intrusive<c10::ivalue::Future>(
c10::ListType::create(c10::TensorType::get()));
c10::ListType::create(c10::TensorType::get()), devices);
}

void returnFutureWithOutput(
Expand Down
9 changes: 7 additions & 2 deletions torch/testing/_internal/distributed/distributed_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4791,7 +4791,12 @@ def _test_ddp_apply_optim_in_backward(

# Test a simple linear as well as a ResNet model.
models_to_test = [
nn.Sequential(nn.Linear(3, 3), nn.Linear(3, 3), nn.Linear(3, 3)).cuda()
nn.Sequential(nn.Linear(3, 3), nn.Linear(3, 3), nn.Linear(3, 3)).cuda(),
# run model of at least 1M parameters to hit potential race conditions in
# stream semantics
nn.Sequential(
nn.Linear(3, 1024), nn.Linear(1024, 1024), nn.Linear(1024, 3)
).cuda(),
]
if HAS_TORCHVISION:
models_to_test.append(torchvision.models.resnet50().cuda())
Expand Down Expand Up @@ -4831,7 +4836,7 @@ def _test_ddp_apply_optim_in_backward(
for i in range(8):
inp = (
torch.randn(1, 3, 1000, 1000, device="cuda")
if j == 1
if j == 2
else torch.randn(10, 3, device="cuda")
)
model(inp).sum().backward()
Expand Down