feature: add gpu support for alexnet

wz · wz · commit 6eb15ea7d4f2 · 2020-01-17T14:48:39.000+08:00
diff --git a/pytorch_learning/Test2_alexnet/model.py b/pytorch_learning/Test2_alexnet/model.py
@@ -8,23 +8,23 @@ def __init__(self, num_classes=1000, init_weights=False):
         self.features = nn.Sequential(
             nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2),  # input[3, 224, 224]  output[48, 55, 55]
             nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),  # output[48, 27, 27]
-            nn.Conv2d(48, 128, kernel_size=5, padding=2),  # output[128, 27, 27]
+            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[48, 27, 27]
+            nn.Conv2d(48, 128, kernel_size=5, padding=2),           # output[128, 27, 27]
             nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),  # output[128, 13, 13]
-            nn.Conv2d(128, 192, kernel_size=3, padding=1),  # output[192, 13, 13]
+            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 13, 13]
+            nn.Conv2d(128, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
             nn.ReLU(inplace=True),
-            nn.Conv2d(192, 192, kernel_size=3, padding=1),  # output[192, 13, 13]
+            nn.Conv2d(192, 192, kernel_size=3, padding=1),          # output[192, 13, 13]
             nn.ReLU(inplace=True),
-            nn.Conv2d(192, 128, kernel_size=3, padding=1),  # output[128, 13, 13]
+            nn.Conv2d(192, 128, kernel_size=3, padding=1),          # output[128, 13, 13]
             nn.ReLU(inplace=True),
-            nn.MaxPool2d(kernel_size=3, stride=2),  # output[128, 6, 6]
+            nn.MaxPool2d(kernel_size=3, stride=2),                  # output[128, 6, 6]
         )
         self.classifier = nn.Sequential(
-            nn.Dropout(p=0.2),
+            nn.Dropout(p=0.5),
             nn.Linear(128 * 6 * 6, 2048),
             nn.ReLU(inplace=True),
-            nn.Dropout(p=0.2),
+            nn.Dropout(p=0.5),
             nn.Linear(2048, 2048),
             nn.ReLU(inplace=True),
             nn.Linear(2048, num_classes),
diff --git a/pytorch_learning/Test2_alexnet/predict.py b/pytorch_learning/Test2_alexnet/predict.py
@@ -37,5 +37,5 @@
     output = torch.squeeze(model(img))
     predict = torch.softmax(output, dim=0)
     predict_cla = torch.argmax(predict).numpy()
-print(class_indict[str(predict_cla)])
+print(class_indict[str(predict_cla)], predict[predict_cla].item())
 plt.show()
diff --git a/pytorch_learning/Test2_alexnet/train.py b/pytorch_learning/Test2_alexnet/train.py
@@ -7,6 +7,9 @@
 from model import AlexNet
 import os
 import json
+import time
+
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
 data_transform = {
     "train": transforms.Compose([transforms.RandomResizedCrop(224),
@@ -31,17 +34,17 @@
 with open('class_indices.json', 'w') as json_file:
     json_file.write(json_str)
 
-batch_size = 32
+batch_size = 64
 train_loader = torch.utils.data.DataLoader(train_dataset,
                                            batch_size=batch_size, shuffle=True,
-                                           num_workers=0)
+                                           num_workers=16)
 
 validate_dataset = datasets.ImageFolder(root=image_path + "/val",
                                         transform=data_transform["val"])
 val_num = len(validate_dataset)
 validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                               batch_size=batch_size, shuffle=False,
-                                              num_workers=0)
+                                              num_workers=16)
 
 # test_data_iter = iter(validate_loader)
 # test_image, test_label = test_data_iter.next()
@@ -54,40 +57,53 @@
 
 
 net = AlexNet(num_classes=5, init_weights=True)
+
+net.to(device)
 loss_function = nn.CrossEntropyLoss()
 pata = list(net.parameters())
-optimizer = optim.Adam(net.parameters(), lr=0.0005)
+optimizer = optim.Adam(net.parameters(), lr=0.0002)
 
-for epoch in range(10):
+save_path = './AlexNet.pth'
+best_acc = 0.0
+for epoch in range(15):
     # train
     net.train()
     running_loss = 0.0
+    t1 = time.perf_counter()
     for step, data in enumerate(train_loader, start=0):
         images, labels = data
         # imshow(torchvision.utils.make_grid(images))
         # print(' '.join('%5s' % flower_set[labels[j]] for j in range(8)))
         optimizer.zero_grad()
-        outputs = net(images)
-        loss = loss_function(outputs, labels)
+        outputs = net(images.to(device))
+        loss = loss_function(outputs, labels.to(device))
         loss.backward()
         optimizer.step()
 
         # print statistics
         running_loss += loss.item()
+        # print train process
+        rate = (step + 1) / len(train_loader)
+        a = "*" * int(rate * 50)
+        b = "." * int((1 - rate) * 50)
+        print("\rtrain loss: {:^3.0f}%[{}->{}]{:.3f}".format(int(rate * 100), a, b, loss), end="")
+    print()
+    print(time.perf_counter()-t1)
 
     # validate
     net.eval()
     acc = 0.0  # accumulate accurate number / epoch
     with torch.no_grad():
         for data_test in validate_loader:
             test_images, test_labels = data_test
-            outputs = net(test_images)
+            outputs = net(test_images.to(device))
             predict_y = torch.max(outputs, dim=1)[1]
-            acc += (predict_y == test_labels).sum().item()
+            acc += (predict_y == test_labels.to(device)).sum().item()
+        accurate_test = acc / val_num
+        if accurate_test > best_acc:
+            best_acc = accurate_test
+            torch.save(net.state_dict(), save_path)
         print('[epoch %d] train_loss: %.3f  test_accuracy: %.3f' %
               (epoch + 1, running_loss / step, acc / val_num))
 
-
 print('Finished Training')
-save_path = './AlexNet.pth'
-torch.save(net.state_dict(), save_path)
diff --git a/summary_problem.md b/summary_problem.md
@@ -1,3 +1,7 @@
+## Tensorflow2.1 GPU安装与Pytorch1.3 GPU安装
+参考我之前写的博文：[Centos7 安装Tensorflow2.1 GPU以及Pytorch1.3 GPU（CUDA10.1）](https://blog.csdn.net/qq_37541097/article/details/103933366)
+
+
 ## keras functional api训练的模型权重与subclassed训练的模型权重能否混用 [tensorflow2.0.0]
 强烈不建议混用，即使两个模型的名称结构完全一致也不要混用，里面有坑，用什么方法训练的模型就载入相应的模型权重
 
@@ -16,6 +20,9 @@ model.build((batch_size, height, width, channel))
 * 安装graphviz，并添加相关环境变量  
 参考连接：https://github.com/XifengGuo/CapsNet-Keras/issues/7
 
-## 为什么每计算一个batch，就需要调用一次optimizer.zero_grad()    
+## 为什么每计算一个batch，就需要调用一次optimizer.zero_grad() [Pytorch1.3]   
 如果不清除历史梯度，就会对计算的历史梯度进行累加（通过这个特性你能够变相实现一个很大batch数值的训练）   
-参考链接：https://www.zhihu.com/question/303070254    
+参考链接：https://www.zhihu.com/question/303070254    
+
+## Pytorch1.3 ImportError: cannot import name 'PILLOW_VERSION' [Pytorch1.3]  
+pillow版本过高导致，安装版本号小于7.0.0即可