add xpu support

[pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci update typos and bug fixes [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci xpu seeding PR1 [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci add seeding for pytorch utilities mp_fabric xpu forking xpu multiprocess pytorch add header for xpu rename change to lightning.pytorch [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Teardown from lightning-xpu (from #PR- 3) From #3 [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci add torch.xpu.stream to ddp update docs [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci update _LIGHTNING_XPU_AVAILABLE to _lightning_xpu_available correct fabric imports.py 1. remove xpu.py from _graveyard 2. correct _lightning_xpu_available() usage fix _try_import function not defined issue in fabric add docs [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci
Lightning-AI · Oct 3, 2023 · af90eb9 · af90eb9
1 parent 9d7bc82
commit af90eb9
Show file tree

Hide file tree

Showing 28 changed files with 380 additions and 59 deletions.
diff --git a/docs/source-fabric/fundamentals/launch.rst b/docs/source-fabric/fundamentals/launch.rst
@@ -93,8 +93,9 @@ This is essentially the same as running ``python path/to/your/script.py``, but i
       itself and are expected to be parsed there.
 
     Options:
-      --accelerator [cpu|gpu|cuda|mps|tpu]
+      --accelerator [cpu|gpu|cuda|mps|tpu|xpu]
                                       The hardware accelerator to run on.
+                                      Install Lightning-XPU to enable ``xpu``.
       --strategy [ddp|dp|deepspeed]   Strategy for how to run across multiple
                                       devices.
       --devices TEXT                  Number of devices to run on (``int``), which

diff --git a/docs/source-pytorch/common/index.rst b/docs/source-pytorch/common/index.rst
@@ -17,6 +17,7 @@
    ../advanced/model_parallel
    Train on single or multiple GPUs <../accelerators/gpu>
    Train on single or multiple HPUs <../integrations/hpu/index>
+   Train on single or multiple XPUs <../integrations/xpu/index>
    Train on single or multiple IPUs <../accelerators/ipu>
    Train on single or multiple TPUs <../accelerators/tpu>
    Train on MPS <../accelerators/mps>
@@ -168,6 +169,13 @@ How-to Guides
     :col_css: col-md-4
     :height: 180
 
+.. displayitem::
+    :header: Train on single or multiple XPUs
+    :description: Train models faster with XPU accelerators
+    :button_link: ../integrations/xpu/index.html
+    :col_css: col-md-4
+    :height: 180
+
 .. displayitem::
     :header: Train on single or multiple IPUs
     :description: Train models faster with IPU accelerators

diff --git a/docs/source-pytorch/common_usecases.rst b/docs/source-pytorch/common_usecases.rst
@@ -133,6 +133,13 @@ Customize and extend Lightning for things like custom hardware or distributed st
    :button_link: integrations/hpu/index.html
    :height: 100
 
+.. displayitem::
+   :header: Train on single or multiple XPUs
+   :description: Train models faster with XPUs.
+   :col_css: col-md-12
+   :button_link: integrations/xpu/index.html
+   :height: 100
+
 .. displayitem::
    :header: Train on single or multiple IPUs
    :description: Train models faster with IPUs.

diff --git a/docs/source-pytorch/conf.py b/docs/source-pytorch/conf.py
@@ -96,6 +96,11 @@ def _load_py_module(name: str, location: str) -> ModuleType:
     target_dir="docs/source-pytorch/integrations/hpu",
     checkout="tags/1.0.0",
 )
+assist_local.AssistantCLI.pull_docs_files(
+    gh_user_repo="Lightning-AI/lightning-XPU",
+    target_dir="docs/source-pytorch/integrations/xpu",
+    checkout="tags/1.0.0",
+)
 
 if not _FAST_DOCS_DEV:
     fetch_external_assets(
@@ -324,6 +329,7 @@ def _load_py_module(name: str, location: str) -> ModuleType:
     "torchmetrics": ("https://torchmetrics.readthedocs.io/en/stable/", None),
     "graphcore": ("https://docs.graphcore.ai/en/latest/", None),
     "habana": ("https://lightning-ai.github.io/lightning-Habana/", None),
+    "intel-xpu": ("https://lightning-ai.github.io/lightning-XPU/", None),
 }
 
 # -- Options for todo extension ----------------------------------------------

diff --git a/docs/source-pytorch/extensions/accelerator.rst b/docs/source-pytorch/extensions/accelerator.rst
@@ -12,6 +12,7 @@ Currently there are accelerators for:
 - :doc:`TPU <../accelerators/tpu>`
 - :doc:`IPU <../accelerators/ipu>`
 - :doc:`HPU <../integrations/hpu/index>`
+- :doc:`XPU <../integrations/xpu/index>`
 - :doc:`MPS <../accelerators/mps>`
 
 The Accelerator is part of the Strategy which manages communication across multiple devices (distributed communication).
@@ -32,16 +33,16 @@ Create a Custom Accelerator
 .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.
 
 Here is how you create a new Accelerator.
-Let's pretend we want to integrate the fictional XPU accelerator and we have access to its hardware through a library
-``xpulib``.
+Let's pretend we want to integrate the fictional YPU accelerator and we have access to its hardware through a library
+``ypulib``.
 
 .. code-block:: python
 
-    import xpulib
+    import ypulib
 
 
-    class XPUAccelerator(Accelerator):
-        """Support for a hypothetical XPU, optimized for large-scale machine learning."""
+    class YPUAccelerator(Accelerator):
+        """Support for a hypothetical YPU, optimized for large-scale machine learning."""
 
         @staticmethod
         def parse_devices(devices: Any) -> Any:
@@ -52,29 +53,29 @@ Let's pretend we want to integrate the fictional XPU accelerator and we have acc
         @staticmethod
         def get_parallel_devices(devices: Any) -> Any:
             # Here, convert the device indices to actual device objects
-            return [torch.device("xpu", idx) for idx in devices]
+            return [torch.device("ypu", idx) for idx in devices]
 
         @staticmethod
         def auto_device_count() -> int:
             # Return a value for auto-device selection when `Trainer(devices="auto")`
-            return xpulib.available_devices()
+            return ypulib.available_devices()
 
         @staticmethod
         def is_available() -> bool:
-            return xpulib.is_available()
+            return ypulib.is_available()
 
         def get_device_stats(self, device: Union[str, torch.device]) -> Dict[str, Any]:
             # Return optional device statistics for loggers
             return {}
 
 
-Finally, add the XPUAccelerator to the Trainer:
+Finally, add the YPUAccelerator to the Trainer:
 
 .. code-block:: python
 
     from lightning.pytorch import Trainer
 
-    accelerator = XPUAccelerator()
+    accelerator = YPUAccelerator()
     trainer = Trainer(accelerator=accelerator, devices=2)
 
 
@@ -90,28 +91,28 @@ If you wish to switch to a custom accelerator from the CLI without code changes,
 
 .. code-block:: python
 
-    class XPUAccelerator(Accelerator):
+    class YPUAccelerator(Accelerator):
         ...
 
         @classmethod
         def register_accelerators(cls, accelerator_registry):
             accelerator_registry.register(
-                "xpu",
+                "ypu",
                 cls,
-                description=f"XPU Accelerator - optimized for large-scale machine learning.",
+                description=f"YPU Accelerator - optimized for large-scale machine learning.",
             )
 
 Now, this is possible:
 
 .. code-block:: python
 
-    trainer = Trainer(accelerator="xpu")
+    trainer = Trainer(accelerator="ypu")
 
 Or if you are using the Lightning CLI, for example:
 
 .. code-block:: bash
 
-    python train.py fit --trainer.accelerator=xpu --trainer.devices=2
+    python train.py fit --trainer.accelerator=ypu --trainer.devices=2
 
 
 ----------

diff --git a/docs/source-pytorch/glossary/index.rst b/docs/source-pytorch/glossary/index.rst
@@ -18,6 +18,7 @@
    GPU <../accelerators/gpu>
    Half precision <../common/precision>
    HPU <../integrations/hpu/index>
+   XPU <../integrations/xpu/index>
    Inference <../deploy/production_intermediate>
    IPU <../accelerators/ipu>
    Lightning CLI <../cli/lightning_cli>
@@ -159,6 +160,13 @@ Glossary
    :button_link: ../integrations/hpu/index.html
    :height: 100
 
+.. displayitem::
+   :header: XPU
+   :description: Intel® Graphics Cards for faster training
+   :col_css: col-md-12
+   :button_link: ../integrations/xpu/index.html
+   :height: 100
+
 .. displayitem::
    :header: Inference
    :description: Making predictions by applying a trained model to unlabeled examples

diff --git a/docs/source-pytorch/integrations/xpu/index.rst b/docs/source-pytorch/integrations/xpu/index.rst
@@ -0,0 +1,40 @@
+.. _xpu:
+
+Accelerator: XPU training
+=========================
+
+.. raw:: html
+
+    <div class="display-card-container">
+        <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+   :header: Basic
+   :description: Learn the basics of single and multi-XPU core training.
+   :col_css: col-md-4
+   :button_link: basic.html
+   :height: 150
+   :tag: basic
+
+.. displayitem::
+   :header: Intermediate
+   :description: Enable state-of-the-art scaling with advanced mix-precision settings.
+   :col_css: col-md-4
+   :button_link: intermediate.html
+   :height: 150
+   :tag: intermediate
+
+.. displayitem::
+   :header: Advanced
+   :description: Explore state-of-the-art scaling with additional advanced configurations.
+   :col_css: col-md-4
+   :button_link: advanced.html
+   :height: 150
+   :tag: advanced
+
+.. raw:: html
+
+        </div>
+    </div>
diff --git a/docs/source-pytorch/levels/advanced_level_23.rst b/docs/source-pytorch/levels/advanced_level_23.rst
@@ -0,0 +1,37 @@
+:orphan:
+
+######################
+Level 19: Explore XPUs
+######################
+
+Explore Intel® Graphics Cards (XPU) for model scaling.
+
+----
+
+.. raw:: html
+
+    <div class="display-card-container">
+        <div class="row">
+
+.. Add callout items below this line
+
+.. displayitem::
+   :header: Train models on XPUs
+   :description: Learn the basics of single and multi-XPU core training.
+   :col_css: col-md-6
+   :button_link: ../integrations/xpu/basic.html
+   :height: 150
+   :tag: basic
+
+.. displayitem::
+   :header: Optimize models training on XPUs
+   :description: Enable state-of-the-art scaling with advanced mixed-precision settings.
+   :col_css: col-md-6
+   :button_link: ../integrations/xpu/intermediate.html
+   :height: 150
+   :tag: intermediate
+
+.. raw:: html
+
+        </div>
+    </div>
diff --git a/requirements/_integrations/accelerators.txt b/requirements/_integrations/accelerators.txt
@@ -1,3 +1,6 @@
 # validation HPU connectors
 lightning-habana >=0.1.0
 lightning-graphcore >=0.1.0.rc4
+
+# validation XPU connectors
+lightning-xpu >=0.1.0
diff --git a/src/lightning/fabric/accelerators/__init__.py b/src/lightning/fabric/accelerators/__init__.py
@@ -22,3 +22,13 @@
 
 ACCELERATOR_REGISTRY = _AcceleratorRegistry()
 _register_classes(ACCELERATOR_REGISTRY, "register_accelerators", sys.modules[__name__], Accelerator)
+
+from lightning.fabric.utilities.imports import _lightning_xpu_available
+
+_ACCELERATORS_BASE_MODULE = "lightning.fabric.accelerators"
+ACCELERATOR_REGISTRY = _AcceleratorRegistry()
+call_register_accelerators(ACCELERATOR_REGISTRY, _ACCELERATORS_BASE_MODULE)
+if _lightning_xpu_available() and "xpu" not in ACCELERATOR_REGISTRY:
+    from lightning_xpu.fabric import XPUAccelerator
+
+    XPUAccelerator.register_accelerators(ACCELERATOR_REGISTRY)
diff --git a/src/lightning/fabric/cli.py b/src/lightning/fabric/cli.py
@@ -24,12 +24,15 @@
 from lightning.fabric.plugins.precision.precision import _PRECISION_INPUT_STR, _PRECISION_INPUT_STR_ALIAS
 from lightning.fabric.strategies import STRATEGY_REGISTRY
 from lightning.fabric.utilities.device_parser import _parse_gpu_ids
+from lightning.fabric.utilities.imports import _lightning_xpu_available
 
 _log = logging.getLogger(__name__)
 
 _CLICK_AVAILABLE = RequirementCache("click")
 
-_SUPPORTED_ACCELERATORS = ("cpu", "gpu", "cuda", "mps", "tpu")
+_SUPPORTED_ACCELERATORS = ["cpu", "gpu", "cuda", "mps", "tpu"]
+if _lightning_xpu_available():
+    _SUPPORTED_ACCELERATORS.append("xpu")
 
 
 def _get_supported_strategies() -> List[str]:
@@ -148,13 +151,17 @@ def _set_env_variables(args: Namespace) -> None:
 def _get_num_processes(accelerator: str, devices: str) -> int:
     """Parse the `devices` argument to determine how many processes need to be launched on the current machine."""
     if accelerator == "gpu":
-        parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True)
+        parsed_devices = _parse_gpu_ids(devices, include_cuda=True, include_mps=True, include_xpu=True)
     elif accelerator == "cuda":
         parsed_devices = CUDAAccelerator.parse_devices(devices)
     elif accelerator == "mps":
         parsed_devices = MPSAccelerator.parse_devices(devices)
     elif accelerator == "tpu":
         raise ValueError("Launching processes for TPU through the CLI is not supported.")
+    elif accelerator == "xpu":
+        from lightning_xpu.fabric import XPUAccelerator
+
+        parsed_devices = XPUAccelerator.parse_devices(devices)
     else:
         return CPUAccelerator.parse_devices(devices)
     return len(parsed_devices) if parsed_devices is not None else 0