github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/assets/pytorch_mgpu/data_parallel_tutorial.py

github.com/sentienttechnologies/studio-go-runner@v0.0.0-20201118202441-6d21f2ced8ee/assets/pytorch_mgpu/data_parallel_tutorial.py (about)

     1  """
     2  This file has no formal copyright legend however it is functional in nature only
     3  and not incorporated into the studio-go-runner source code and as a result is
     4  assumed to be copyright to the authors detailed below.
     5  
     6  Optional: Data Parallelism
     7  ==========================
     8  **Authors**: `Sung Kim <https://github.com/hunkim>`_ and `Jenny Kang <https://github.com/jennykang>`_
     9  
    10  In this tutorial, we will learn how to use multiple GPUs using ``DataParallel``.
    11  
    12  It's very easy to use GPUs with PyTorch. You can put the model on a GPU:
    13  
    14  .. code:: python
    15  
    16      device = torch.device("cuda:0")
    17      model.to(device)
    18  
    19  Then, you can copy all your tensors to the GPU:
    20  
    21  .. code:: python
    22  
    23      mytensor = my_tensor.to(device)
    24  
    25  Please note that just calling ``my_tensor.to(device)`` returns a new copy of
    26  ``my_tensor`` on GPU instead of rewriting ``my_tensor``. You need to assign it to
    27  a new tensor and use that tensor on the GPU.
    28  
    29  It's natural to execute your forward, backward propagations on multiple GPUs.
    30  However, Pytorch will only use one GPU by default. You can easily run your
    31  operations on multiple GPUs by making your model run parallelly using
    32  ``DataParallel``:
    33  
    34  .. code:: python
    35  
    36      model = nn.DataParallel(model)
    37  
    38  That's the core behind this tutorial. We will explore it in more detail below.
    39  """
    40  
    41  
    42  ######################################################################
    43  # Imports and parameters
    44  # ----------------------
    45  #
    46  # Import PyTorch modules and define parameters.
    47  #
    48  
    49  import torch
    50  import torch.nn as nn
    51  from torch.utils.data import Dataset, DataLoader
    52  
    53  # Parameters and DataLoaders
    54  input_size = 5
    55  output_size = 2
    56  
    57  batch_size = 30
    58  data_size = 100
    59  
    60  
    61  ######################################################################
    62  # Device
    63  #
    64  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    65  
    66  ######################################################################
    67  # Dummy DataSet
    68  # -------------
    69  #
    70  # Make a dummy (random) dataset. You just need to implement the
    71  # getitem
    72  #
    73  
    74  class RandomDataset(Dataset):
    75  
    76      def __init__(self, size, length):
    77          self.len = length
    78          self.data = torch.randn(length, size)
    79  
    80      def __getitem__(self, index):
    81          return self.data[index]
    82  
    83      def __len__(self):
    84          return self.len
    85  
    86  rand_loader = DataLoader(dataset=RandomDataset(input_size, data_size),
    87                           batch_size=batch_size, shuffle=True)
    88  
    89  
    90  ######################################################################
    91  # Simple Model
    92  # ------------
    93  #
    94  # For the demo, our model just gets an input, performs a linear operation, and
    95  # gives an output. However, you can use ``DataParallel`` on any model (CNN, RNN,
    96  # Capsule Net etc.)
    97  #
    98  # We've placed a print statement inside the model to monitor the size of input
    99  # and output tensors.
   100  # Please pay attention to what is printed at batch rank 0.
   101  #
   102  
   103  class Model(nn.Module):
   104      # Our model
   105  
   106      def __init__(self, input_size, output_size):
   107          super(Model, self).__init__()
   108          self.fc = nn.Linear(input_size, output_size)
   109  
   110      def forward(self, input):
   111          output = self.fc(input)
   112          print("\tIn Model: input size", input.size(),
   113                "output size", output.size())
   114  
   115          return output
   116  
   117  
   118  ######################################################################
   119  # Create Model and DataParallel
   120  # -----------------------------
   121  #
   122  # This is the core part of the tutorial. First, we need to make a model instance
   123  # and check if we have multiple GPUs. If we have multiple GPUs, we can wrap
   124  # our model using ``nn.DataParallel``. Then we can put our model on GPUs by
   125  # ``model.to(device)``
   126  #
   127  
   128  model = Model(input_size, output_size)
   129  if torch.cuda.device_count() > 1:
   130    print("Let's use", torch.cuda.device_count(), "GPUs!")
   131    # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
   132    model = nn.DataParallel(model)
   133  
   134  model.to(device)
   135  
   136  
   137  ######################################################################
   138  # Run the Model
   139  # -------------
   140  #
   141  # Now we can see the sizes of input and output tensors.
   142  #
   143  
   144  for data in rand_loader:
   145      input = data.to(device)
   146      output = model(input)
   147      print("Outside: input size", input.size(),
   148            "output_size", output.size())
   149  
   150  
   151  ######################################################################
   152  # Results
   153  # -------
   154  #
   155  # If you have no GPU or one GPU, when we batch 30 inputs and 30 outputs, the model gets 30 and outputs 30 as
   156  # expected. But if you have multiple GPUs, then you can get results like this.
   157  #
   158  # 2 GPUs
   159  # ~~~~~~
   160  #
   161  # If you have 2, you will see:
   162  #
   163  # .. code:: bash
   164  #
   165  #     # on 2 GPUs
   166  #     Let's use 2 GPUs!
   167  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   168  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   169  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   170  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   171  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   172  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   173  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   174  #         In Model: input size torch.Size([15, 5]) output size torch.Size([15, 2])
   175  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   176  #         In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
   177  #         In Model: input size torch.Size([5, 5]) output size torch.Size([5, 2])
   178  #     Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])
   179  #
   180  # 3 GPUs
   181  # ~~~~~~
   182  #
   183  # If you have 3 GPUs, you will see:
   184  #
   185  # .. code:: bash
   186  #
   187  #     Let's use 3 GPUs!
   188  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   189  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   190  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   191  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   192  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   193  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   194  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   195  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   196  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   197  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   198  #         In Model: input size torch.Size([10, 5]) output size torch.Size([10, 2])
   199  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   200  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   201  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   202  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   203  #     Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])
   204  #
   205  # 8 GPUs
   206  # ~~~~~~~~~~~~~~
   207  #
   208  # If you have 8, you will see:
   209  #
   210  # .. code:: bash
   211  #
   212  #     Let's use 8 GPUs!
   213  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   214  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   215  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   216  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   217  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   218  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   219  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   220  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   221  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   222  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   223  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   224  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   225  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   226  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   227  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   228  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   229  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   230  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   231  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   232  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   233  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   234  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   235  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   236  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   237  #         In Model: input size torch.Size([4, 5]) output size torch.Size([4, 2])
   238  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   239  #     Outside: input size torch.Size([30, 5]) output_size torch.Size([30, 2])
   240  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   241  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   242  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   243  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   244  #         In Model: input size torch.Size([2, 5]) output size torch.Size([2, 2])
   245  #     Outside: input size torch.Size([10, 5]) output_size torch.Size([10, 2])
   246  #
   247  
   248  
   249  ######################################################################
   250  # Summary
   251  # -------
   252  #
   253  # DataParallel splits your data automatically and sends job orders to multiple
   254  # models on several GPUs. After each model finishes their job, DataParallel
   255  # collects and merges the results before returning it to you.
   256  #
   257  # For more information, please check out
   258  # http://pytorch.org/tutorials/beginner/former\_torchies/parallelism\_tutorial.html.
   259  #