I am implementing an image classifier using the Oxford Pet dataset with the pre-trained Resnet18 CNN. The dataset consists of 37 categories with ~200 images in each of them.
Rather than using the final fc layer of the CNN as output to make predictions I want to use the CNN as a feature extractor to classify the pets.
For each image i'd like to grab features from the last hidden layer (which should be before the 1000-dimensional output layer). My model is using Relu activation so I should grab the output just after the ReLU (so all values will be non-negative)
Here is code (following the transfer learning tutorial on Pytorch):
loading data
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])image_datasets = {"train": datasets.ImageFolder('images_new/train', transforms.Compose([transforms.RandomResizedCrop(224),transforms.RandomHorizontalFlip(),transforms.ToTensor(),normalize])), "test": datasets.ImageFolder('images_new/test', transforms.Compose([transforms.Resize(256),transforms.CenterCrop(224),transforms.ToTensor(),normalize]))}dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=4,shuffle=True, num_workers=4, pin_memory=True)for x in ['train', 'test']}dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}train_class_names = image_datasets['train'].classesdevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
train function
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):since = time.time()best_model_wts = copy.deepcopy(model.state_dict())best_acc = 0.0for epoch in range(num_epochs):print('Epoch {}/{}'.format(epoch, num_epochs - 1))print('-' * 10)# Each epoch has a training and validation phasefor phase in ['train', 'test']:if phase == 'train':scheduler.step()model.train() # Set model to training modeelse:model.eval() # Set model to evaluate moderunning_loss = 0.0running_corrects = 0# Iterate over data.for inputs, labels in dataloaders[phase]:inputs = inputs.to(device)labels = labels.to(device)# zero the parameter gradientsoptimizer.zero_grad()# forward# track history if only in trainwith torch.set_grad_enabled(phase == 'train'):outputs = model(inputs)_, preds = torch.max(outputs, 1)loss = criterion(outputs, labels)# backward + optimize only if in training phaseif phase == 'train':loss.backward()optimizer.step()# statisticsrunning_loss += loss.item() * inputs.size(0)running_corrects += torch.sum(preds == labels.data)epoch_loss = running_loss / dataset_sizes[phase]epoch_acc = running_corrects.double() / dataset_sizes[phase]print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))# deep copy the modelif phase == 'test' and epoch_acc > best_acc:best_acc = epoch_accbest_model_wts = copy.deepcopy(model.state_dict())print()time_elapsed = time.time() - sinceprint('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))print('Best val Acc: {:4f}'.format(best_acc))# load best model weightsmodel.load_state_dict(best_model_wts)return model
Compute SGD cross-entropy loss
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_featuresprint("number of features: ", num_ftrs)model_ft.fc = nn.Linear(num_ftrs, len(train_class_names))model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,num_epochs=24)
Now how do I get a feature vector from the last hidden layer for each of my images? I know I have to freeze the previous layer so that gradient isn't computed on them but I'm having trouble extracting the feature vectors.
My ultimate goal is to use those feature vectors to train a linear classifier such as Ridge or something like that.
Thanks!