Merge pull request #149 from Quantum-TII/devices

Improved memory errors and device switcher
qiboteam · Jun 26, 2020 · 60a1ac0 · 60a1ac0
2 parents b89170a + 5bf149f
commit 60a1ac0
Show file tree

Hide file tree

Showing 13 changed files with 234 additions and 114 deletions.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
@@ -125,13 +125,28 @@ one can use:
         # execute circuit on CPU with default initial state |000...0>.
         final_state = c()
 
-Alternatively, running the command ``CUDA_VISIBLE_DEVICES="-1"`` in a terminal
+or switch the default QIBO device using ``qibo.set_device`` as:
+
+.. code-block::  python
+
+    import qibo
+    qibo.set_device("/CPU:0")
+    final_state = c() # circuit will now be executed on CPU
+
+The syntax of device names follows the pattern ``'/{device type}:{device number}'``
+where device type can be CPU or GPU and the device number is an integer that
+distinguishes multiple devices of the same type starting from 0. For more details
+we refer to `Tensorflow's tutorial <https://www.tensorflow.org/guide/gpu#manual_device_placement>`_
+on manual device placement.
+Alternatively, running the command ``CUDA_VISIBLE_DEVICES=""`` in a terminal
 hides GPUs from tensorflow. As a result, any program executed from the same
 terminal will run on CPU even if ``tf.device`` is not used.
 
 GPUs provide much faster execution compared to CPU but have limited memory.
 A standard 12-16GB GPU can simulate up to 30 qubits with single-precision
-or 29 qubits with double-precision when QIBO's default gates are used.
+or 29 qubits with double-precision when QIBO's default gates are used. If the
+used device runs out of memory during a circuit execution an error will be
+raised prompting the user to switch the default device using ``qibo.set_device``.
 
 QIBO supports distributed circuit execution on multiple GPUs. This feature can
 be used as follows:

diff --git a/src/qibo/__init__.py b/src/qibo/__init__.py
@@ -1,5 +1,5 @@
 __version__ = "0.0.1b2"
-from qibo.config import set_precision, set_backend, matrices, K
+from qibo.config import set_precision, set_backend, set_device, matrices, K
 from qibo import callbacks
 from qibo import models
 from qibo import gates

diff --git a/src/qibo/base/circuit.py b/src/qibo/base/circuit.py
@@ -297,8 +297,8 @@ def _set_nqubits(self, gate: gates.Gate):
 
         Helper method for ``circuit.add(gate)``.
         """
-        if gate._nqubits is None:
-            gate.nqubits = self.nqubits
+        if gate._nqubits is None: # pragma: no cover
+            raise NotImplementedError
         elif gate.nqubits != self.nqubits:
             raise ValueError("Attempting to add gate with {} total qubits to "
                              "a circuit with {} qubits."
@@ -415,7 +415,7 @@ def summary(self) -> str:
         return "\n".join(logs)
 
     @property
-    def final_state(self):
+    def final_state(self): # pragma: no cover
         """Returns the final state after full simulation of the circuit.
 
         If the circuit is executed more than once, only the last final state
@@ -424,11 +424,11 @@ def final_state(self):
         raise NotImplementedError
 
     @abstractmethod
-    def execute(self, *args):
+    def execute(self, *args): # pragma: no cover
         """Executes the circuit. Exact implementation depends on the backend."""
         raise NotImplementedError
 
-    def __call__(self, *args):
+    def __call__(self, *args): # pragma: no cover
         """Equivalent to ``circuit.execute``."""
         return self.execute(*args)
 

diff --git a/src/qibo/base/gates.py b/src/qibo/base/gates.py
@@ -139,6 +139,16 @@ def nqubits(self, n: int):
                                "set to {}.".format(self._nqubits))
         self._nqubits = n
         self._nstates = 2**n
+        self._prepare()
+
+    def _prepare(self): # pragma: no cover
+        """Prepares the gate for application to state vectors.
+
+        Called automatically by the ``nqubits`` setter.
+        Calculates the ``matrix`` required to apply the gate to state vectors.
+        This is not necessarily the same as the unitary matrix of the gate.
+        """
+        raise NotImplementedError
 
     def commutes(self, gate: "Gate") -> bool:
         """Checks if two gates commute.
@@ -196,7 +206,7 @@ def decompose(self, *free) -> List["Gate"]:
         # original gate
         return [self.__class__(*self._init_args, **self._init_kwargs)]
 
-    def __call__(self, state, is_density_matrix):
+    def __call__(self, state, is_density_matrix): # pragma: no cover
         """Acts with the gate on a given state vector:
 
         Args:
@@ -308,7 +318,7 @@ def decompose(self, *free: int, use_toffolis: bool = True) -> List[Gate]:
 
             decomp_gates = [*part1, *part2]
 
-        else:
+        else: # pragma: no cover
             raise NotImplementedError("X decomposition is not implemented for "
                                       "zero free qubits.")
 

diff --git a/src/qibo/config.py b/src/qibo/config.py
@@ -10,7 +10,7 @@
 # Choose the least significant qubit
 LEAST_SIGNIFICANT_QUBIT = 0
 
-if LEAST_SIGNIFICANT_QUBIT != 0:
+if LEAST_SIGNIFICANT_QUBIT != 0: # pragma: no cover
     raise NotImplementedError("The least significant qubit should be 0.")
 
 # Load backend specifics
@@ -39,16 +39,22 @@
     # Gate backends
     BACKEND = {'GATES': 'custom', 'EINSUM': None}
 
-    # Set memory cut-off for using GPU when sampling
-    GPU_MEASUREMENT_CUTOFF = 1300000000
-
-    # Find available CPUs as they may be needed for sampling
-    _available_cpus = tf.config.list_logical_devices("CPU")
-    if _available_cpus:
-        CPU_NAME = _available_cpus[0].name
-    else:
-        CPU_NAME = None
-
+    # Set devices recognized by tensorflow
+    DEVICES = {
+        'CPU': tf.config.list_logical_devices("CPU"),
+        'GPU': tf.config.list_logical_devices("GPU")
+    }
+    # set default device to GPU if it exists
+    if DEVICES['GPU']: # pragma: no cover
+        DEVICES['DEFAULT'] = DEVICES['GPU'][0].name
+    elif DEVICES['CPU']:
+        DEVICES['DEFAULT'] = DEVICES['CPU'][0].name
+    else: # pragma: no cover
+        raise RuntimeError("Unable to find Tensorflow devices.")
+
+    # Define numpy and tensorflow matrices
+    # numpy matrices are exposed to user via ``from qibo import matrices``
+    # tensorflow matrices are used by native gates (``/tensorflow/gates.py``)
     from qibo.tensorflow import matrices as _matrices
     matrices = _matrices.NumpyMatrices()
     tfmatrices = _matrices.TensorflowMatrices()
@@ -96,5 +102,29 @@ def set_precision(dtype='double'):
         matrices.allocate_matrices()
         tfmatrices.allocate_matrices()
 
-else:
+
+    def set_device(device_name: str):
+        """Set default execution device.
+
+        Args:
+            device_name (str): Device name. Should follow the pattern
+                '/{device type}:{device number}' where device type is one of
+                CPU or GPU.
+        """
+        parts = device_name[1:].split(":")
+        if device_name[0] != "/" or len(parts) != 2:
+            raise ValueError("Device name should follow the pattern: "
+                             "/{device type}:{device number}.")
+        device_type, device_number = parts[0], int(parts[1])
+        if device_type not in {"CPU", "GPU"}:
+            raise ValueError(f"Unknown device type {device_type}.")
+        if device_number >= len(DEVICES[device_type]):
+            raise ValueError(f"Device {device_name} does not exist.")
+
+        DEVICES['DEFAULT'] = device_name
+        with tf.device(device_name):
+            tfmatrices.allocate_matrices()
+
+
+else: # pragma: no cover
     raise NotImplementedError("Only Tensorflow backend is implemented.")
diff --git a/src/qibo/tensorflow/cgates.py b/src/qibo/tensorflow/cgates.py
@@ -4,7 +4,7 @@
 import numpy as np
 import tensorflow as tf
 from qibo.base import gates as base_gates
-from qibo.config import BACKEND, DTYPES, GPU_MEASUREMENT_CUTOFF, CPU_NAME
+from qibo.config import BACKEND, DTYPES, DEVICES
 from qibo.tensorflow import custom_operators as op
 from typing import Dict, List, Optional, Sequence, Tuple
 
@@ -36,6 +36,15 @@ def construct_unitary(*args) -> tf.Tensor:
         """
         raise NotImplementedError
 
+    def _prepare(self):
+        """Prepares the gate for application to state vectors.
+
+        Called automatically by the ``nqubits`` setter.
+        Calculates the ``matrix`` required to apply the gate to state vectors.
+        This is not necessarily the same as the unitary matrix of the gate.
+        """
+        pass
+
     def __call__(self, state: tf.Tensor, is_density_matrix: bool = False
                  ) -> tf.Tensor:
         """Implements the `Gate` on a given state.
@@ -54,18 +63,7 @@ def __init__(self):
         super(MatrixGate, self).__init__()
         self.matrix = None
 
-    @base_gates.Gate.nqubits.setter
-    def nqubits(self, n: int):
-        base_gates.Gate.nqubits.fset(self, n) # pylint: disable=no-member
-        self._prepare()
-
-    def _prepare(self):
-        """Prepares the gate for application to state vectors.
-
-        Called automatically by the ``nqubits`` setter.
-        Calculates the ``matrix`` required to apply the gate to state vectors.
-        This is not necessarily the same as the unitary matrix of the gate.
-        """
+    def _prepare(self): # pragma: no cover
         raise NotImplementedError
 
     def __call__(self, state: tf.Tensor, is_density_matrix: bool = False
@@ -194,16 +192,16 @@ def __call__(self, state: tf.Tensor, nshots: int,
             tf.reshape(state, shape), is_density_matrix)
         logits = tf.math.log(tf.reshape(probs, (probs_dim,)))
 
-        if nshots * probs_dim < GPU_MEASUREMENT_CUTOFF:
-            # Use default device to perform sampling
+
+        oom_error = tf.python.framework.errors_impl.ResourceExhaustedError
+        try:
             samples_dec = tf.random.categorical(logits[tf.newaxis], nshots,
                                                 dtype=DTYPES.get('DTYPEINT'))[0]
-        else: # pragma: no cover
-            # Force using CPU to perform sampling because if GPU is used
-            # it will cause a `ResourceExhaustedError`
-            if CPU_NAME is None:
+        except oom_error: # pragma: no cover
+            # Force using CPU to perform sampling
+            if not DEVICES['CPU']:
                 raise RuntimeError("Cannot find CPU device to use for sampling.")
-            with tf.device(CPU_NAME):
+            with tf.device(DEVICES['CPU'][0]):
                 samples_dec = tf.random.categorical(logits[tf.newaxis], nshots,
                                                     dtype=DTYPES.get('DTYPEINT'))[0]
         if samples_only:
@@ -502,9 +500,6 @@ def __init__(self, coefficients):
         TensorflowGate.__init__(self)
         self.swap_reset = []
 
-    def _construct_matrix(self):
-        pass
-
     def __call__(self, state: tf.Tensor, is_density_matrix: bool = False
                  ) -> tf.Tensor:
         shape = tuple(state.shape)
@@ -536,7 +531,7 @@ def __call__(self, state: tf.Tensor, is_density_matrix: bool = False
 class TensorflowChannel(TensorflowGate):
 
     def __new__(cls, *args, **kwargs):
-        if BACKEND.get('GATES') == 'custom':
+        if BACKEND.get('GATES') == 'custom': # pragma: no cover
             raise NotImplementedError("Density matrices are not supported by "
                                       "custom operator gates.")
         else:

diff --git a/src/qibo/tensorflow/circuit.py b/src/qibo/tensorflow/circuit.py
@@ -3,10 +3,12 @@
 import numpy as np
 import tensorflow as tf
 from qibo.base import circuit
-from qibo.config import DTYPES
+from qibo.config import DTYPES, DEVICES
 from qibo.tensorflow import measurements
 from qibo.tensorflow import custom_operators as op
 from typing import List, Optional, Tuple, Union
+InitStateType = Union[np.ndarray, tf.Tensor]
+OutputType = Union[tf.Tensor, measurements.CircuitResult]
 
 
 class TensorflowCircuit(circuit.BaseCircuit):
@@ -20,6 +22,13 @@ def __init__(self, nqubits):
         super(TensorflowCircuit, self).__init__(nqubits)
         self._compiled_execute = None
 
+    def _set_nqubits(self, gate):
+        if gate._nqubits is None:
+            with tf.device(DEVICES['DEFAULT']):
+                gate.nqubits = self.nqubits
+        elif gate.nqubits != self.nqubits:
+            super(TensorflowCircuit, self)._set_nqubits(gate)
+
     def _eager_execute(self, state: tf.Tensor) -> tf.Tensor:
         """Simulates the circuit gates in eager mode."""
         for gate in self.queue:
@@ -66,34 +75,9 @@ def using_tfgates(self) -> bool:
         from qibo.tensorflow import gates
         return gates.TensorflowGate == self.gate_module.TensorflowGate
 
-    def execute(self,
-                initial_state: Optional[Union[np.ndarray, tf.Tensor]] = None,
-                nshots: Optional[int] = None,
-                ) -> Union[tf.Tensor, measurements.CircuitResult]:
-        """Propagates the state through the circuit applying the corresponding gates.
-
-        In default usage the full final state vector or density matrix is returned.
-        If the circuit contains measurement gates and `nshots` is given, then
-        the final state is sampled and the samples are returned.
-        Circuit execution uses by default state vectors but switches automatically
-        to density matrices if
-
-        Args:
-            initial_state (np.ndarray): Initial state vector as a numpy array of shape ``(2 ** nqubits,)``
-                or a density matrix of shape ``(2 ** nqubits, 2 ** nqubits)``.
-                A Tensorflow tensor with shape ``nqubits * (2,)`` (or ``2 * nqubits * (2,)`` for density matrices)
-                is also allowed as an initial state but must have the `dtype` of the circuit.
-                If ``initial_state`` is ``None`` the |000...0> state will be used.
-            nshots (int): Number of shots to sample if the circuit contains
-                measurement gates.
-                If ``nshots`` None the measurement gates will be ignored.
-
-        Returns:
-            If ``nshots`` is given and the circuit contains measurements
-                A :class:`qibo.base.measurements.CircuitResult` object that contains the measured bitstrings.
-            If ``nshots`` is ``None`` or the circuit does not contain measurements.
-                The final state vector as a Tensorflow tensor of shape ``(2 ** nqubits,)`` or a density matrix of shape ``(2 ** nqubits, 2 ** nqubits)``.
-        """
+    def _execute(self, initial_state: Optional[InitStateType] = None,
+                 nshots: Optional[int] = None) -> OutputType:
+        """Performs ``circuit.execute`` on specified device."""
         state = self._cast_initial_state(initial_state)
 
         if self.using_tfgates:
@@ -124,8 +108,44 @@ def execute(self,
         return measurements.CircuitResult(
             self.measurement_tuples, self.measurement_gate_result)
 
-    def __call__(self, initial_state: Optional[tf.Tensor] = None,
-                 nshots: Optional[int] = None) -> tf.Tensor:
+    def execute(self, initial_state: Optional[InitStateType] = None,
+                nshots: Optional[int] = None) -> OutputType:
+        """Propagates the state through the circuit applying the corresponding gates.
+
+        In default usage the full final state vector or density matrix is returned.
+        If the circuit contains measurement gates and `nshots` is given, then
+        the final state is sampled and the samples are returned.
+        Circuit execution uses by default state vectors but switches automatically
+        to density matrices if
+
+        Args:
+            initial_state (np.ndarray): Initial state vector as a numpy array of shape ``(2 ** nqubits,)``
+                or a density matrix of shape ``(2 ** nqubits, 2 ** nqubits)``.
+                A Tensorflow tensor with shape ``nqubits * (2,)`` (or ``2 * nqubits * (2,)`` for density matrices)
+                is also allowed as an initial state but must have the `dtype` of the circuit.
+                If ``initial_state`` is ``None`` the |000...0> state will be used.
+            nshots (int): Number of shots to sample if the circuit contains
+                measurement gates.
+                If ``nshots`` None the measurement gates will be ignored.
+
+        Returns:
+            If ``nshots`` is given and the circuit contains measurements
+                A :class:`qibo.base.measurements.CircuitResult` object that contains the measured bitstrings.
+            If ``nshots`` is ``None`` or the circuit does not contain measurements.
+                The final state vector as a Tensorflow tensor of shape ``(2 ** nqubits,)`` or a density matrix of shape ``(2 ** nqubits, 2 ** nqubits)``.
+        """
+        oom_error = tf.python.framework.errors_impl.ResourceExhaustedError
+        device = DEVICES['DEFAULT']
+        try:
+            with tf.device(device):
+                return self._execute(initial_state=initial_state, nshots=nshots)
+        except oom_error:
+            raise RuntimeError(f"State does not fit in {device} memory."
+                               "Please switch the execution device to a "
+                               "different one using ``qibo.set_device``.")
+
+    def __call__(self, initial_state: Optional[InitStateType] = None,
+                 nshots: Optional[int] = None) -> OutputType:
         """Equivalent to ``circuit.execute``."""
         return self.execute(initial_state=initial_state, nshots=nshots)