Skip to content

Commit

Permalink
Merge pull request #9 from sony/feature/20180301-fp16-cuda
Browse files Browse the repository at this point in the history
Type configuration and APIs
  • Loading branch information
KazukiYoshiyama-sony authored Mar 26, 2018
2 parents b59a066 + 1c26415 commit c0baae3
Show file tree
Hide file tree
Showing 41 changed files with 174 additions and 169 deletions.
2 changes: 1 addition & 1 deletion capsule_net/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ There might be some differences between this implemntation and the original pape
To train a CapsNet, run:

```shell
python train.py [-c cuda.cudnn] [-d <device_id>] [--disable-grad-dynamic-routing]
python train.py [-c cudnn] [-d <device_id>] [--disable-grad-dynamic-routing]
```

You can see the list of options by running command with `-h` option.
Expand Down
16 changes: 8 additions & 8 deletions capsule_net/reconstruct_tweaked_capsules.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@ def get_args(monitor_path='tmp.monitor.capsnet'):
type=str, default=monitor_path,
help='Path monitoring logs saved.')
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension modules. ex) 'cpu', 'cuda.cudnn'.")
parser.add_argument("--device-id", "-d", type=int, default=0,
default=None, help="Extension modules. ex) 'cpu', 'cudnn'.")
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cuda.cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type of computation. e.g. "float", "half".')
args = parser.parse_args()
assert os.path.isdir(
args.monitor_path), "Run train.py before running this."
Expand Down Expand Up @@ -97,12 +99,10 @@ def main():
args = get_args()

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Load parameters
Expand Down
16 changes: 8 additions & 8 deletions capsule_net/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,11 @@ def get_args(monitor_path='tmp.monitor.capsnet'):
parser.add_argument("--max-epochs", "-e", type=int, default=50,
help='Max epochs of training.')
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension modules. ex) 'cpu', 'cuda.cudnn'.")
parser.add_argument("--device-id", "-d", type=int, default=0,
default=None, help="Extension modules. ex) 'cpu', 'cudnn'.")
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cuda.cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type of computation. e.g. "float", "half".')
parser.add_argument("--disable-grad-dynamic-routing", "-g",
dest='grad_dynamic_routing',
action='store_false', default=True,
Expand All @@ -78,12 +80,10 @@ def train():
seed(0)

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# TRAIN
Expand Down
7 changes: 5 additions & 2 deletions cifar10-100-collection/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,16 @@ def get_args(monitor_path='tmp.monitor', max_iter=40000, model_save_path='tmp.mo
parser.add_argument("--val-iter", "-j", type=int, default=100)
parser.add_argument("--weight-decay", "-w",
type=float, default=weight_decay)
parser.add_argument("--device-id", "-d", type=int, default=0)
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type of computation. e.g. "float", "half".')
parser.add_argument("--warmup-epoch", "-e", type=int, default=warmup_epoch)
parser.add_argument("--model-save-interval", "-s", type=int, default=1000)
parser.add_argument("--model-save-path", "-o",
type=str, default=model_save_path)
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension path. ex) cpu, cuda.cudnn.")
default=None, help="Extension path. ex) cpu, cudnn.")
parser.add_argument("--net", "-n", type=str,
default='cifar10_resnet23',
help="Neural network architecure type (used only in classification.py).\n"
Expand Down
7 changes: 4 additions & 3 deletions cifar10-100-collection/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from cifar10_data import data_iterator_cifar10
from cifar100_data import data_iterator_cifar100
import nnabla as nn
from nnabla.contrib.context import extension_context
from nnabla.ext_utils import get_extension_context
import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S
Expand Down Expand Up @@ -55,7 +55,8 @@ def train():
n_train_samples = 50000
bs_valid = args.batch_size
extension_module = args.context
ctx = extension_context(extension_module, device_id=args.device_id)
ctx = get_extension_context(
extension_module, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)
if args.net == "cifar10_resnet23":
prediction = functools.partial(
Expand Down Expand Up @@ -137,7 +138,7 @@ def train():
"""
Call this script with `mpirun` or `mpiexec`
$ mpirun -n 4 python multi_device_multi_process.py --context "cuda.cudnn" -bs 64
$ mpirun -n 4 python multi_device_multi_process.py --context "cudnn" -bs 64
"""
train()
2 changes: 1 addition & 1 deletion cifar10-100-collection/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from cifar100_data import data_iterator_cifar100
import nnabla as nn
import nnabla.communicators as C
from nnabla.contrib.context import extension_context
from nnabla.ext_utils import get_extension_context
import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S
Expand Down
4 changes: 2 additions & 2 deletions distributed/cifar10-100/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ NOTE that if you would like to run this example, please follow the bulid instruc
When you run the script like the following,

```
mpirun -n 4 python multi_device_multi_process_classification.py --context "cuda.cudnn" -b 64
mpirun -n 4 python multi_device_multi_process_classification.py --context "cudnn" -b 64
```

Expand All @@ -32,7 +32,7 @@ NOTE that if you would like to run this example, please follow the bulid instruc
When you run the script like the following,

```
mpirun --hostfile hostfile python multi_device_multi_process_classification.py --context "cuda.cudnn" -b 64
mpirun --hostfile hostfile python multi_device_multi_process_classification.py --context "cudnn" -b 64
```

Expand Down
7 changes: 5 additions & 2 deletions distributed/cifar10-100/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,17 @@ def get_args(monitor_path='tmp.monitor', max_iter=234300, model_save_path='tmp.m
parser.add_argument("--val-iter", "-j", type=int, default=100)
parser.add_argument("--weight-decay", "-w",
type=float, default=weight_decay)
parser.add_argument("--device-id", "-d", type=int, default=0)
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type of computation. e.g. "float", "half".')
parser.add_argument("--n-devices", "-n", type=int, default=n_devices)
parser.add_argument("--warmup-epoch", "-e", type=int, default=warmup_epoch)
parser.add_argument("--model-save-interval", "-s", type=int, default=1000)
parser.add_argument("--model-save-path", "-o",
type=str, default=model_save_path)
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension path. ex) cpu, cuda.cudnn.")
default=None, help="Extension path. ex) cpu, cudnn.")
parser.add_argument("--net", type=str,
default='cifar10_resnet23',
help="Neural network architecure type (used only in classification.py).\n"
Expand Down
2 changes: 1 addition & 1 deletion distributed/cifar10-100/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from cifar100_data import data_iterator_cifar100
import nnabla as nn
import nnabla.communicators as C
from nnabla.contrib.context import extension_context
from nnabla.ext_utils import get_extension_context
import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from nnabla.utils.data_iterator import data_iterator
import nnabla as nn
import nnabla.communicators as C
from nnabla.contrib.context import extension_context
from nnabla.ext_utils import get_extension_context
import nnabla.functions as F
import nnabla.parametric_functions as PF
import nnabla.solvers as S
Expand Down Expand Up @@ -70,8 +70,8 @@ def train():
data_iterator = data_iterator_cifar100

# Create Communicator and Context
extension_module = "cuda.cudnn"
ctx = extension_context(extension_module)
extension_module = "cudnn"
ctx = get_extension_context(extension_module)
comm = C.MultiProcessDataParalellCommunicator(ctx)
comm.init()
n_devices = comm.size
Expand Down Expand Up @@ -192,7 +192,7 @@ def train():
"""
Call this script with `mpirun` or `mpiexec`
$ mpirun -n 4 python multi_device_multi_process.py --context "cuda.cudnn" -bs 64
$ mpirun -n 4 python multi_device_multi_process.py --context "cudnn" -bs 64
"""
train()
2 changes: 1 addition & 1 deletion imagenet-classification/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ In this example, "Residual Neural Network" (also called "ResNet") is trained on
The following line executes the Tiny ImageNet training (with the setting the we recommended you to try first. It requires near 6GB memory available in the CUDA device. See more options in the help by the `-h` option.).

```
python classification.py -c cuda.cudnn -a4 -b64 -L34
python classification.py -c cudnn -a4 -b64 -L34
```

Tiny ImageNet consists of 200 categories and each category has 500 of 64x64 size images in training set.
Expand Down
8 changes: 5 additions & 3 deletions imagenet-classification/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,17 @@ def get_args(monitor_path='tmp.monitor.imagenet', max_iter=500000, model_save_pa
parser.add_argument("--weight-decay", "-w",
type=float, default=weight_decay,
help='Weight decay factor of SGD update.')
parser.add_argument("--device-id", "-d", type=int, default=0,
help='Device ID the training run on. This is only valid if you specify `-c cuda.cudnn`.')
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type configuration.')
parser.add_argument("--model-save-interval", "-s", type=int, default=1000,
help='The interval of saving model parameters.')
parser.add_argument("--model-save-path", "-o",
type=str, default=model_save_path,
help='Path the model parameters saved.')
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension module. 'cuda.cudnn' is highly.recommended.")
default=None, help="Extension module. 'cudnn' is highly.recommended.")
parser.add_argument("--num-layers", "-L", type=int,
choices=[18, 34, 50, 101, 152], default=34,
help='Number of layers of ResNet.')
Expand Down
5 changes: 3 additions & 2 deletions imagenet-classification/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,13 @@ def train():
args = get_args()

# Get context.
from nnabla.contrib.context import extension_context
from nnabla.ext_utils import get_extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
ctx = get_extension_context(
extension_module, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Dataset
Expand Down
6 changes: 4 additions & 2 deletions mnist-collection/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ def get_args(monitor_path='tmp.monitor', max_iter=10000, model_save_path=None, l
parser.add_argument("--weight-decay", "-w",
type=float, default=weight_decay,
help='Weight decay factor of SGD update.')
parser.add_argument("--device-id", "-d", type=int, default=0,
parser.add_argument("--device-id", "-d", type=str, default='0',
help='Device ID the training run on. This is only valid if you specify `-c cuda.cudnn`.')
parser.add_argument("--type-config", "-t", type=str, default='float',
help='Type of computation. e.g. "float", "half".')
parser.add_argument("--model-save-interval", "-s", type=int, default=1000,
help='The interval of saving model parameters.')
parser.add_argument("--model-save-path", "-o",
Expand All @@ -52,7 +54,7 @@ def get_args(monitor_path='tmp.monitor', max_iter=10000, model_save_path=None, l
default='lenet',
help="Neural network architecure type (used only in classification*.py).\n classification.py: ('lenet'|'resnet'), classification_bnn.py: ('bincon'|'binnet'|'bwn'|'bwn'|'bincon_resnet'|'binnet_resnet'|'bwn_resnet')")
parser.add_argument('--context', '-c', type=str,
default=None, help="Extension modules. ex) 'cpu', 'cuda.cudnn'.")
default='cpu', help="Extension modules. ex) 'cpu', 'cudnn'.")
parser.add_argument('--augment-train', action='store_true',
default=False, help="Enable data augmentation of training data.")
parser.add_argument('--augment-test', action='store_true',
Expand Down
15 changes: 9 additions & 6 deletions mnist-collection/classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from args import get_args
from mnist_data import data_iterator_mnist

import numpy as np


def categorical_error(pred, label):
"""
Expand Down Expand Up @@ -129,12 +131,10 @@ def train():
seed(0)

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Create CNN network for both training and testing.
Expand Down Expand Up @@ -186,6 +186,7 @@ def train():
for j in range(args.val_iter):
vimage.d, vlabel.d = vdata.next()
vpred.forward(clear_buffer=True)
vpred.data.cast(np.float32, ctx)
ve += categorical_error(vpred.d, vlabel.d)
monitor_verr.add(i, ve / args.val_iter)
if i % args.model_save_interval == 0:
Expand All @@ -198,6 +199,8 @@ def train():
loss.backward(clear_buffer=True)
solver.weight_decay(args.weight_decay)
solver.update()
loss.data.cast(np.float32, ctx)
pred.data.cast(np.float32, ctx)
e = categorical_error(pred.d, label.d)
monitor_loss.add(i, loss.d.copy())
monitor_err.add(i, e)
Expand Down
10 changes: 4 additions & 6 deletions mnist-collection/classification_bnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,12 +244,10 @@ def train():
args = get_args(monitor_path='tmp.monitor.bnn')

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Initialize DataIterator for MNIST.
Expand Down
10 changes: 4 additions & 6 deletions mnist-collection/dcgan.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,10 @@ def train(args):
"""

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Create CNN network for both training and testing.
Expand Down
10 changes: 4 additions & 6 deletions mnist-collection/siamese.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,10 @@ def train(args):
"""

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Create CNN network for both training and testing.
Expand Down
10 changes: 4 additions & 6 deletions mnist-collection/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,10 @@ def main():
model_save_path=None, learning_rate=3e-4, batch_size=100, weight_decay=0)

# Get context.
from nnabla.contrib.context import extension_context
extension_module = args.context
if args.context is None:
extension_module = 'cpu'
logger.info("Running in %s" % extension_module)
ctx = extension_context(extension_module, device_id=args.device_id)
from nnabla.ext_utils import get_extension_context
logger.info("Running in %s" % args.context)
ctx = get_extension_context(
args.context, device_id=args.device_id, type_config=args.type_config)
nn.set_default_context(ctx)

# Initialize data provider
Expand Down
Loading

0 comments on commit c0baae3

Please sign in to comment.