Skip to content
This repository has been archived by the owner on Dec 11, 2024. It is now read-only.

[WIP, DO NOT MERGE] Apple Silicon Support, Polyhedral Compiler, etc. #151

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0460060
[Add] Allocator for MetalTensor
hikettei Nov 27, 2023
b71ab88
Rewriting as many things as possible to introduce polymorphism to the…
hikettei Dec 23, 2023
4f32e82
Added a dependency: cl-metal
hikettei Dec 23, 2023
b0193de
add: cl-waffe2-metal
hikettei Dec 23, 2023
cad83c5
updated qlfile.lock
hikettei Dec 23, 2023
368a1ce
Added a simplest case of metal tensor backend
hikettei Dec 30, 2023
a036789
Added a concept of action, invocation, and session for a jit compiler
hikettei Dec 30, 2023
d2c4f13
Added an automatic tracer of AbstractNode, dedicated to JIT Compiler
hikettei Dec 30, 2023
be5335d
jit.lisp was born
hikettei Dec 30, 2023
7fed6a3
Added an option unrolloing save for backward ops
hikettei Dec 30, 2023
ae4971b
Improved the expression of IR displayed in the terminal
hikettei Dec 30, 2023
be17b32
Opt: Simplified the expression of binary ops in order to fuse more op…
hikettei Dec 31, 2023
c020c51
Eliminated ScalarXXX Nodes for simplicity
hikettei Dec 31, 2023
5f3d5a4
Updated the internal expression of polyhedral compiler
hikettei Dec 31, 2023
988bbe6
Added: Scheduler
hikettei Jan 1, 2024
d971dc0
Enhancement: Scheduler for fusing several element-wise operations
hikettei Jan 1, 2024
d735466
Trying to implement the model of Polyhedral
hikettei Jan 2, 2024
2b3bb12
Experimental: Optimizes the memory-locality by Polyhedral Compiler
hikettei Jan 3, 2024
e8ccf55
Experimental: Added Schedulers
hikettei Jan 4, 2024
a0d9360
Update: LispTensor gencode backend
hikettei Jan 4, 2024
0020636
Update: Codegen based LispTensor Backend
hikettei Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions cl-waffe2-metal/package.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

(cl:in-package :cl-user)

(defpackage :cl-waffe2/backends.metal
(:documentation
"## [package] :cl-waffe2/backends.metal

MPS Backend for cl-waffe2

# Installling cl-metal

```
$ qlot install
```
")
(:use
:cl
:cl-metal
:cl-waffe2/base-impl
:cl-waffe2/vm.generic-tensor
:cl-waffe2/vm.nodes)
(:export
#:MetalTensor
))

(in-package :cl-waffe2/backends.metal)

(eval-when (:compile-toplevel :load-toplevel :execute)
(unless (find :mgetal *features*)
(when (metal-available-p)
(push :metal *features*)))

(declaim (inline metal-reject-p))
(defun metal-reject-p ()
#+metal(progn t)
#-metal(progn nil)))

;; If Metal is available on the environment, cl-waffe2 prefers to use Metal instead of CPUTensor.
#+metal(setf cl-waffe2/vm.generic-tensor:*using-backend* `(MetalTensor cl-waffe2/backends.lisp:LispTensor))

30 changes: 30 additions & 0 deletions cl-waffe2-metal/tensor.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@

(in-package :cl-waffe2/backends.metal)


;; Allocator, Dependencies, Configuration for MetalTensor
(defclass MetalTensor (cl-waffe2/backends.lisp:LispTensor)
nil
(:documentation
"## [AbstractTensor] MetalTensor
Provides Metal-Accelerated Operations
"))

(defmethod wf/t:current-backend-state ((backend-name (eql 'MetalTensor)))
#+metal(format nil "Available (~a)" (machine-version))
#-metal(format nil "Not Available"))

;; ~~ Utils for manipulating array pointers ~~~~~~~~~~~~~~~~~~~~~~~~~~~
(defmacro with-tensor-ptr ((bind tensor) &body body)
`(let ((,bind (tensor-vec ,tensor)))
;; Stored as a simple-array
,@body))

(defmacro with-tensor-ptrs ((&rest input-forms) &body body)
(labels ((expand (rest-forms)
(if rest-forms
`(with-tensor-ptr (,(caar rest-forms) ,(second (car rest-forms)))
,(expand (cdr rest-forms)))
`(progn ,@body))))
(expand input-forms)))

102 changes: 102 additions & 0 deletions cl-waffe2-metal/topi/arithmetic.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@

(in-package :cl-waffe2/backends.metal)

;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
;; Add, Sub, Mul, Div, Move (+ - * / =)
;; ScalarAdd, ScalarSub, ScalarMul, ScalarDiv,
;; Reciprocal
;; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

;; Defines an BLAS-like element-wise metal kernel
;; TODO: add :cache-global option to avoid re-loading overheads
(macrolet ((def-kernel (op name dtype
&aux (fname (symb name '- dtype)))
`(define-kernel (,fname :thread-position-in-grid id)
;; X + Y -> Y
(void ((x* ,dtype :in)
(x-offset uint :in)
(incx uint :in)
(y* ,dtype :io)
(y-offset uint :in)
(incy uint :in)))
(,op (aref y (+ y-offset (* incy id)))
(aref x (+ x-offset (* incx id))))))
(def (op name)
`(progn
(def-metal-caller ,name (x y) (x x-offset incx y y-offset incy))
,@(loop for case in *all-dtype-case*
collect
`(def-kernel ,op ,name ,case)))))
(def setf metal-move)
(def incf metal-add)
(def decf metal-sub)
(def mulcf metal-mul)
(def divcf metal-div))

(macrolet ((def-kernel (op name dtype
&aux (fname (symb name '- dtype)))
`(define-kernel (,fname :thread-position-in-grid id)
;; X + Y -> Y
(void ((x* ,dtype :io)
(x-offset uint :in)
(incx uint :in)
(scal ,dtype :in)))
(,op (aref x (+ x-offset (* incx id))) scal)))
(def (op name)
`(progn
(def-metal-caller ,name (x) (x x-offset incx scal))
,@(loop for case in *all-dtype-case*
collect
`(def-kernel ,op ,name ,case)))))
(def incf metal-add-scal)
(def decf metal-sub-scal)
(def mulcf metal-mul-scal)
(def divcf metal-div-scal))

(macrolet ((def-impl (node-name op)
`(define-impl
;; A B -> A
(,node-name :device MetalTensor)
:forward ((self x y)
`(progn
,(call-with-view
#'(lambda (x-view y-view)
`(,',op
,(dtype x)
,(size-of x-view 0)
,y
,(offset-of y-view 0)
,(stride-of y-view 0)
,x
,(offset-of x-view 0)
,(stride-of x-view 0)))
(list x y))
,x)))))
(def-impl AddNode metal-add)
(def-impl SubNode metal-sub)
(def-impl MulNode metal-mul)
(def-impl DivNode metal-div)
(def-impl MoveTensorNode metal-move))

(macrolet ((def-impl (node-name op)
`(define-impl
;; A B -> A
(,node-name :device MetalTensor)
:forward ((self x scalar)
`(progn
,(call-with-view
#'(lambda (x-view)
`(,',op
,(dtype x)
,(size-of x-view 0)
,x
,(offset-of x-view 0)
,(stride-of x-view 0)
(tensor-vec ,scalar)))
(list x))
,x)))))
(def-impl ScalarAdd metal-add-scal)
(def-impl ScalarSub metal-sub-scal)
(def-impl ScalarMul metal-mul-scal)
(def-impl ScalarDiv metal-div-scal))

4 changes: 4 additions & 0 deletions cl-waffe2-metal/topi/mathematical.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

(in-package :cl-waffe2/backends.metal)


41 changes: 41 additions & 0 deletions cl-waffe2-metal/topi/topi.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@

(in-package :cl-waffe2/backends.metal)

(defparameter *all-dtype-case* `(float int64-t int32-t int16-t int8-t uint64-t uint32-t uint16-t uint8-t))

(defun dtype->mtype (dtype)
(declare (type keyword dtype))
(case dtype
(:float 'float)
(:int64 'int64-t)
(:int32 'int32-t)
(:int16 'int16-t)
(:int8 'int8-t)
(:uint64 'uint64-t)
(:uint32 'uint32-t)
(:uint16 'uint16-t)
(:uint8 'uint8-t)
(T (error "dtype->mtype: not supported dtype: ~a" dtype))))

(defmacro def-metal-caller (name (&rest tensors) (&rest args))
`(defun ,name (dtype n ,@args)
(with-tensor-ptrs (,@(loop for tensor in tensors
collect
`(,tensor ,tensor)))
(case (dtype->mtype dtype)
,@(loop for dtype in *all-dtype-case*
collect
`(,dtype
(%funcall-metal
(get-kernel ',(symb name '- dtype))
:args (list ,@args)
:kcount n)))))))

(defun mfuncall (function &rest args)
(funcall function (map 'list #'tensor-vec args)))

(define-compiler-macro mfuncall (function &rest args)
`(funcall ,function ,@(loop for x in args
collect
`(tensor-vec ,x))))

6 changes: 6 additions & 0 deletions cl-waffe2-metal/utils.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

(in-package :cl-waffe2/backends.metal)

(defun symb (&rest inputs)
(intern (with-output-to-string (out) (dolist (sym inputs) (princ sym out)))))

33 changes: 24 additions & 9 deletions cl-waffe2.asd
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@

(in-package :cl-user)

(defpackage :cl-waffe2-metal-asd
(:use :cl :asdf :uiop))

(in-package :cl-waffe2-metal-asd)

(defsystem :cl-waffe2/metal-extension
:author "hikettei"
:licence "MIT"
:description ""
:pathname "cl-waffe2-metal"
:serial t
:depends-on (:cl-metal)
:components ((:file "package")
(:file "tensor")
(:file "utils")
(:file "topi/topi")
(:file "topi/arithmetic")
(:file "topi/mathematical")))

(defpackage :cl-waffe2-simd-asd
(:use :cl :asdf :uiop))
Expand All @@ -18,7 +36,6 @@
(:file "shared-object")
(:file "api")))


(defpackage :cl-waffe2-asd
(:use :cl :asdf :uiop))

Expand All @@ -42,17 +59,15 @@
:closer-mop
:trivial-garbage
:cl-waffe2/simd-extension

:cl-environments
:numpy-file-format
:jonathan)
:jonathan
:linear-programming)
;; TODO: Use components and split dependencies.
:components ((:file "threads")

(:file "vm/generic-tensor/package")
(:file "vm/generic-tensor/conditions")


(:file "vm/generic-tensor/default-impls")

;; Load package.lisp first. (since scheduling depends on vm/nodes/package, MoveNodeTensor in base-impl/package)
Expand All @@ -65,14 +80,14 @@
(:file "vm/generic-tensor/dtype")
(:file "vm/lazy-subscript")
(:file "vm/allocation")

;; Iterators
(:file "vm/iterator/package")
(:file "vm/jit")
(:file "vm/generic-tensor/render")
(:file "vm/iterator/range")
(:file "vm/iterator/opt-loops")


(:file "vm/iterator/dsl")
(:file "vm/iterator/polyhedral")

(:file "vm/generic-tensor/cache")
(:file "vm/generic-tensor/utils")
Expand Down Expand Up @@ -130,7 +145,7 @@
(:file "backends/lisp/logical")
(:file "backends/lisp/matrix-ops")
(:file "backends/lisp/lazy")

(:file "backends/lisp/codegen")

(:file "backends/cpu/package")
(:file "backends/cpu/tensor")
Expand Down
1 change: 1 addition & 0 deletions qlfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
git cl-metal [email protected]:hikettei/cl-metal.git
8 changes: 8 additions & 0 deletions qlfile.lock
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
("quicklisp" .
(:class qlot/source/dist:source-dist
:initargs (:distribution "https://beta.quicklisp.org/dist/quicklisp.txt" :%version :latest)
:version "2023-10-21"))
("cl-metal" .
(:class qlot/source/git:source-git
:initargs (:remote-url "[email protected]:hikettei/cl-metal.git")
:version "git-50d776a2b89ba659502ba918ab1131bd5cde951e"))
69 changes: 0 additions & 69 deletions source/backends/cpu/arithmetic.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -148,72 +148,3 @@
(declare (ignore args))
(not *simd-extension-p*)))

;; InverseTensorNode

(defun expand-arithmetic-scalar-form (x x-ptr scalar &key (fname "add"))
(let ((fname (make-fname (dtype x) fname :scal t)))
(call-with-view
#'(lambda (x-view)
`(,fname ,(size-of x-view 0) (incf-tensor-ptr ,x ,x-ptr :offset ,(offset-of x-view 0)) ,(stride-of x-view 0) ,scalar))
`(,x))))

(defun expand-inv-form (x x-ptr)
(let ((fname (make-fname (dtype x) "inv")))
(call-with-view
#'(lambda (x-view)
`(,fname ,(size-of x-view 0) (incf-tensor-ptr ,x ,x-ptr :offset ,(offset-of x-view 0)) ,(stride-of x-view 0)))
`(,x))))

(define-impl (InverseTensorNode :device CPUTensor :reject-p #'simd-extension-p)
:forward ((self x)
(let ((x-ptr (gensym "PTR")))
`(with-tensor-ptrs ((,x-ptr ,x))
(locally (declare (optimize (speed 1)))
,(expand-inv-form x x-ptr)
,x)))))
;; ScalarXX Series
(define-impl (ScalarAdd :device CPUTensor
:reject-p #'simd-extension-p)
:forward ((self x scalar)
(let ((x-ptr (gensym "PTR"))
(scal (gensym "SCAL")))
`(with-tensor-ptrs ((,x-ptr ,x))
(locally (declare (optimize (speed 1)))
(let ((,scal (coerce (tensor-vec ,scalar) (dtype->lisp-type ,(dtype x)))))
,(expand-arithmetic-scalar-form x x-ptr scal :fname "add")
,x))))))

(define-impl (ScalarSub :device CPUTensor
:reject-p #'simd-extension-p)
:forward ((self x scalar)
(let ((x-ptr (gensym "PTR"))
(scal (gensym "SCAL")))
`(with-tensor-ptrs ((,x-ptr ,x))
(locally (declare (optimize (speed 1)))
(let ((,scal (coerce (tensor-vec ,scalar) (dtype->lisp-type ,(dtype x)))))
,(expand-arithmetic-scalar-form x x-ptr scal :fname "sub")
,x))))))

(define-impl (ScalarMul :device CPUTensor
:reject-p #'simd-extension-p)
:forward ((self x scalar)
(let ((x-ptr (gensym "PTR"))
(scal (gensym "SCAL")))
`(with-tensor-ptrs ((,x-ptr ,x))
(locally (declare (optimize (speed 1)))
(let ((,scal (coerce (tensor-vec ,scalar) (dtype->lisp-type ,(dtype x)))))
,(expand-arithmetic-scalar-form x x-ptr scal :fname "mul")
,x))))))

(define-impl (ScalarDiv :device CPUTensor
:reject-p #'simd-extension-p)
:forward ((self x scalar)
(let ((x-ptr (gensym "PTR"))
(scal (gensym "SCAL")))
`(with-tensor-ptrs ((,x-ptr ,x))
(locally (declare (optimize (speed 1)))
(let ((,scal (coerce (tensor-vec ,scalar) (dtype->lisp-type ,(dtype x)))))
,(expand-arithmetic-scalar-form x x-ptr scal :fname "div")
,x))))))


Loading
Loading