I'm trying to write my first neural network with pytorch. Unfortunately, I encounter a problem when I want to get the loss. The following error message:
RuntimeError: Function 'LogSoftmaxBackward0' returned nan values in its 0th output.
So I tried debugging and found something strange. The input has no nans and infs as I verify with the following:
print(torch.any(torch.isnan(inputs)))
But if I always let the individual steps in the model x be output, I see that there will be inf at some point.
training
inputs, labels = data
print(torch.any(torch.isnan(inputs)))
optimizer.zero_grad()
outputs = model(inputs)
print(outputs)
loss = criterion(outputs, labels)
print(f"epoch: {epoch + 1} loss: {loss.item()}")
loss.backward()optimizer.step()
model
class Net(Module):def __init__(self):super(Net, self).__init__()self.layer1 = Conv1d(in_channels=1, out_channels=5, kernel_size=5, stride=2, dtype=torch.float64)self.act1 = ReLU()self.pool1 = MaxPool1d(2)self.layer2 = Conv1d(in_channels=5, out_channels=1, kernel_size=2, dtype=torch.float64)self.fcl1 = Linear(1350, 16, dtype=torch.float64)def forward(self, x):print("raw", x)x = self.layer1(x)print("conv1d 1", x)x = self.act1(x)print("relu", x)x = self.layer2(x)print("conv1d 2", x)x = self.pool1(x)x = self.pool1(x)x = self.pool1(x)x = self.pool1(x)x = self.pool1(x)x = self.pool1(x)x = self.pool1(x)print("pools", x)x = self.fcl1(x)print("linear", x)return x
output
tensor(False)
raw tensor([[9.0616e+227, 2.4353e-152, 1.0294e-71, ..., 0.0000e+00,0.0000e+00, 0.0000e+00]], dtype=torch.float64)
conv1d 1 tensor([[ -inf, -inf, -inf, ..., -0.2516, -0.2516, -0.2516],[ inf, inf, inf, ..., 0.3377, 0.3377, 0.3377],[ -inf, -inf, -inf, ..., 0.4285, 0.4285, 0.4285],[ -inf, -inf, -inf, ..., -0.1230, -0.1230, -0.1230],[ inf, inf, inf, ..., 0.3793, 0.3793, 0.3793]],dtype=torch.float64, grad_fn=<SqueezeBackward1>)
relu tensor([[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],[ inf, inf, inf, ..., 0.3377, 0.3377, 0.3377],[0.0000, 0.0000, 0.0000, ..., 0.4285, 0.4285, 0.4285],[0.0000, 0.0000, 0.0000, ..., 0.0000, 0.0000, 0.0000],[ inf, inf, inf, ..., 0.3793, 0.3793, 0.3793]],dtype=torch.float64, grad_fn=<ReluBackward0>)
conv1d 2 tensor([[ -inf, -inf, -inf, ..., -5.4167e+265,-5.4167e+265, -5.4167e+265]], dtype=torch.float64,grad_fn=<SqueezeBackward1>)
pools tensor([[ -inf, -5.4167e+265, -5.4167e+265, ..., -5.4167e+265,-5.4167e+265, -5.4167e+265]], dtype=torch.float64,grad_fn=<SqueezeBackward1>)
linear tensor([[inf, inf, -inf, -inf, -inf, inf, inf, inf, inf, inf, inf, -inf, inf, inf, -inf, -inf]],dtype=torch.float64, grad_fn=<AddmmBackward0>)
tensor([[inf, inf, -inf, -inf, -inf, inf, inf, inf, inf, inf, inf, -inf, inf, inf, -inf, -inf]],dtype=torch.float64, grad_fn=<AddmmBackward0>)
epoch: 1 loss: nan
Thanks for helping