Oneflow-Inc · ShawnXuan · Sep 23, 2024 · ShawnXuan · Sep 23, 2024
diff --git a/cv/classification/configs/default_settings.yaml b/cv/classification/configs/default_settings.yaml
@@ -1,7 +1,7 @@
 DATA:
   BATCH_SIZE: 32
   DATASET: imagenet
-  DATA_PATH: /data/dataset/ImageNet/extract
+  DATA_PATH: /data0/datasets/ImageNet/extract
   IMG_SIZE: 224
   INTERPOLATION: bicubic
   ZIP_MODE: False
@@ -49,4 +49,4 @@ SAVE_FREQ: 1
 PRINT_FREQ: 50
 SEED: 42
 EVAL_MODE: True
-THROUGHPUT_MODE: False
+THROUGHPUT_MODE: False
diff --git a/cv/classification/configs/resnest_default_settings.yaml b/cv/classification/configs/resnest_default_settings.yaml
@@ -1,7 +1,7 @@
 DATA:
   BATCH_SIZE: 32
   DATASET: imagenet
-  DATA_PATH: /data/dataset/ImageNet/extract
+  DATA_PATH: /data0/datasets/ImageNet/extract
   IMG_SIZE: 256
   INTERPOLATION: bicubic
   ZIP_MODE: False

diff --git a/cv/classification/main.py b/cv/classification/main.py
@@ -8,6 +8,7 @@
 import datetime
 import numpy as np
 import oneflow as flow
+import oneflow_npu
 import oneflow.backends.cudnn as cudnn
 
 from flowvision.loss.cross_entropy import (
@@ -141,7 +142,8 @@ def main(config):
 
     logger.info(f"Creating model:{config.MODEL.ARCH}")
     model = build_model(config)
-    model.cuda()
+    #model.cuda()
+    model.to("npu")
 
     optimizer = build_optimizer(config, model)
     model = flow.nn.parallel.DistributedDataParallel(model, broadcast_buffers=False, use_bucket=False)
@@ -255,8 +257,8 @@ def train_one_epoch(
     start = time.time()
     end = time.time()
     for idx, (samples, targets) in enumerate(data_loader):
-        samples = samples.cuda()
-        targets = targets.cuda()
+        samples = samples.to("npu")
+        targets = targets.to("npu")
 
         if mixup_fn is not None:
             samples, targets = mixup_fn(samples, targets)
@@ -324,15 +326,15 @@ def validate(config, data_loader, model):
 
     end = time.time()
     for idx, (images, target) in enumerate(data_loader):
-        images = images.cuda()
-        target = target.cuda()
+        images = images.to("npu")
+        target = target.to("npu")
 
         # compute output
         output = model(images)
 
         # measure accuracy and record loss
         loss = criterion(output, target)
-        acc1, acc5 = accuracy(output, target, topk=(1, 5))
+        acc1, acc5 = accuracy(output.cpu(), target.cpu(), topk=(1, 5))
 
         acc1 = reduce_tensor(acc1)
         acc5 = reduce_tensor(acc5)
@@ -370,18 +372,20 @@ def throughput(data_loader, model, logger):
     model.eval()
 
     for idx, (images, _) in enumerate(data_loader):
-        images = images.cuda()
+        images = images.to("npu")
         batch_size = images.shape[0]
         for i in range(50):
             model(images)
-        flow.cuda.synchronize()
+        if flow.cuda.is_available():
+            flow.cuda.synchronize()
         # TODO: add flow.cuda.synchronize()
         logger.info(f"throughput averaged with 30 times")
         tic1 = time.time()
         for i in range(30):
             model(images)
 
-        flow.cuda.synchronize()
+        if flow.cuda.is_available():
+            flow.cuda.synchronize()
         tic2 = time.time()
         logger.info(
             f"batch_size {batch_size} throughput {30 * batch_size / (tic2 - tic1)}"

diff --git a/cv/classification/requirements.txt b/cv/classification/requirements.txt
@@ -3,4 +3,4 @@ Pillow==9.5.0
 PyYAML==6.0.1
 termcolor==1.1.0
 yacs==0.1.8
-opencv-python==4.4.0.46
+#opencv-python==4.4.0.46
diff --git a/cv/classification/resnet50/train.sh b/cv/classification/resnet50/train.sh
@@ -1,7 +1,7 @@
 export PYTHONPATH=$PWD:$PYTHONPATH
 set -aux
 
-GPU_NUMS=8
+GPU_NUMS=1
 PORT=12346
 MODEL_ARCH="resnet50"