I am making a DataLoader
from DataSet
in PyTorch
.
Start from loading the DataFrame
with all dtype as an np.float64
result = pd.read_csv('dummy.csv', header=0, dtype=DTYPE_CLEANED_DF)
Here is my dataset classes.
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):def __init__(self, result):headers = list(result)headers.remove('classes')self.x_data = result[headers]self.y_data = result['classes']self.len = self.x_data.shape[0]def __getitem__(self, index):x = torch.tensor(self.x_data.iloc[index].values, dtype=torch.float)y = torch.tensor(self.y_data.iloc[index], dtype=torch.float)return (x, y)def __len__(self):return self.len
Prepare the train_loader and test_loader
train_size = int(0.5 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])train_loader = DataLoader(dataset=train_dataset, batch_size=16, shuffle=True, num_workers=1)
test_loader = DataLoader(dataset=train_dataset)
Here is my csv
file
When I try to iterate over the train_loader
. It raises the error
for i , (data, target) in enumerate(train_loader):print(i)TypeError Traceback (most recent call last)
<ipython-input-32-0b4921c3fe8c> in <module>
----> 1 for i , (data, target) in enumerate(train_loader):2 print(i)/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)635 self.reorder_dict[idx] = batch636 continue
--> 637 return self._process_next_batch(batch)638 639 next = __next__ # Python 2 compatibility/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_next_batch(self, batch)656 self._put_indices()657 if isinstance(batch, ExceptionWrapper):
--> 658 raise batch.exc_type(batch.exc_msg)659 return batch660 TypeError: Traceback (most recent call last):File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in _worker_loopsamples = collate_fn([dataset[i] for i in batch_indices])File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 138, in <listcomp>samples = collate_fn([dataset[i] for i in batch_indices])File "/opt/conda/lib/python3.6/site-packages/torch/utils/data/dataset.py", line 103, in __getitem__return self.dataset[self.indices[idx]]File "<ipython-input-27-107e03bc3c6a>", line 12, in __getitem__x = torch.tensor(self.x_data.iloc[index].values, dtype=torch.float)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py", line 1478, in __getitem__return self._getitem_axis(maybe_callable, axis=axis)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py", line 2091, in _getitem_axisreturn self._get_list_axis(key, axis=axis)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexing.py", line 2070, in _get_list_axisreturn self.obj._take(key, axis=axis)File "/opt/conda/lib/python3.6/site-packages/pandas/core/generic.py", line 2789, in _takeverify=True)File "/opt/conda/lib/python3.6/site-packages/pandas/core/internals.py", line 4537, in takenew_labels = self.axes[axis].take(indexer)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 2195, in takereturn self._shallow_copy(taken)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/range.py", line 267, in _shallow_copyreturn self._int64index._shallow_copy(values, **kwargs)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/numeric.py", line 68, in _shallow_copyreturn self._shallow_copy_with_infer(values=values, **kwargs)File "/opt/conda/lib/python3.6/site-packages/pandas/core/indexes/base.py", line 538, in _shallow_copy_with_inferif not len(values) and 'dtype' not in kwargs:
TypeError: object of type 'numpy.int64' has no len()
Related issues:
https://github.com/pytorch/pytorch/issues/10165
https://github.com/pytorch/pytorch/pull/9237
https://github.com/pandas-dev/pandas/issues/21946
Questions:
How to workaround pandas
issue here?